PackedLLM.py · HiMind/PackedLLM at main

PackedLLM / PackedLLM.py

Upload 5 files

299b8f3 verified 11 days ago

300 kB

	from __future__ import annotations
	'''PackedLLM, By: Chance Brownfield-\|-HiMindAi@proton.me'''
	import ast
	import argparse
	import base64
	import contextlib
	import dataclasses
	import hashlib
	import importlib
	import importlib.util
	import io
	import json
	import lzma
	import math
	import os
	import platform
	import queue
	import re
	import shutil
	import subprocess
	import sys
	import tempfile
	import textwrap
	import threading
	import time
	import traceback
	import types
	import uuid
	import zipfile
	from collections import OrderedDict, defaultdict, deque
	from dataclasses import dataclass, field, asdict
	from datetime import datetime
	from pathlib import Path
	from typing import (
	Any, Callable, Dict, Iterable, List, Mapping,
	MutableMapping, Optional, Sequence, Tuple, Union
	)
	from urllib.parse import urlparse, parse_qs, quote, unquote
	from multiprocessing import Process, Queue, get_context
	import concurrent.futures
	import numpy as np
	import psutil
	import requests
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torch import Tensor
	try:
	from transformers import (
	MarianMTModel,
	MarianTokenizer,
	AutoModelForSeq2SeqLM,
	AutoTokenizer,
	)
	_HAS_TRANSFORMERS = True
	except ImportError:
	_HAS_TRANSFORMERS = False
	MarianMTModel = MarianTokenizer = None
	AutoModelForSeq2SeqLM = AutoTokenizer = None

	try:
	from sentence_transformers import SentenceTransformer, util
	_HAS_SENTENCE_TRANSFORMERS = True
	except ImportError:
	_HAS_SENTENCE_TRANSFORMERS = False
	try:
	import fitz # PyMuPDF
	_HAS_PYMUPDF = True
	except ImportError:
	_HAS_PYMUPDF = False
	try:
	from sklearn.cluster import KMeans, AgglomerativeClustering
	from sklearn.decomposition import PCA
	from sklearn.metrics.pairwise import cosine_similarity
	_HAS_SKLEARN = True
	except ImportError:
	_HAS_SKLEARN = False
	KMeans = AgglomerativeClustering = PCA = cosine_similarity = None
	try:
	import spacy
	_HAS_SPACY = True
	except ImportError as e:
	raise RuntimeError(f"spaCy is required: {e}")
	try:
	from bs4 import BeautifulSoup
	except ImportError:
	BeautifulSoup = None
	try:
	import trafilatura
	except ImportError:
	trafilatura = None
	try:
	from readability import Document
	except ImportError:
	Document = None
	try:
	from newspaper import Article
	except ImportError:
	Article = None
	try:
	from goose3 import Goose
	except ImportError:
	Goose = None
	try:
	from boilerpy3 import extractors
	except ImportError:
	extractors = None
	try:
	from inscriptis import get_text as inscriptis_text
	except ImportError:
	inscriptis_text = None
	try:
	from lxml import html as lxml_html
	except ImportError:
	lxml_html = None
	try:
	from youtube_transcript_api import YouTubeTranscriptApi
	except ImportError:
	YouTubeTranscriptApi = None
	try:
	from llama_cpp import Llama
	_HAS_LLAMA_CPP = True
	except ImportError:
	_HAS_LLAMA_CPP = False
	Llama = None
	try:
	import wgpu
	_HAS_WGPU = True
	except ImportError:
	_HAS_WGPU = False
	try:
	from huggingface_hub import snapshot_download
	_HAS_HF_HUB = True
	except ImportError:
	_HAS_HF_HUB = False
	try:
	from safetensors.torch import load_file as safetensors_load
	except ImportError:
	safetensors_load = None
	MODEL_DIR = Path("models")
	EMBEDDING_PATH = MODEL_DIR / "all-MiniLM-L6-v2"
	SUMMARIZER_PATH = MODEL_DIR / "distilbart-cnn-12-6"
	SPACY_MODEL_PATH = MODEL_DIR / "spacy" / "en_core_web_sm"
	SPACY_MODEL_NAME = "en_core_web_sm"
	DEFAULT_CHECKPOINT_PATH = "LM.pt"
	DEFAULT_BUNDLE_PATH = "PackedLM.pt"
	DEFAULT_IMAGE_TEST_SOURCE = "sample_img.png"
	DEFAULT_ZH_EN_DIR = MODEL_DIR / "opus-mt-zh-en"
	_CHUNK_BYTES = 32 * 1024 * 1024
	_CODE_FENCE_RE = re.compile(r"```(?:python)?\s\n(.?)```", re.DOTALL \| re.IGNORECASE)
	CHINESE_RE = re.compile(r"[\u4e00-\u9fff]")
	CHINESE_SPAN_RE = re.compile(
	r"[\u4e00-\u9fff]+(?:[\u3000-\u303F\uFF00-\uFFEF\u2000-\u206F"
	r"\u2E00-\u2E7F\u3000-\u303F\uFF00-\uFFEF\s,.;:!?\-—()\[\]{}，。！？、；：]+"
	r"[\u4e00-\u9fff]+)*"
	)
	GGUF_EMBED_FILENAME = "jina-embeddings-v3-Q8_0.gguf"
	for p in [MODEL_DIR, EMBEDDING_PATH, SUMMARIZER_PATH, SPACY_MODEL_PATH.parent]:
	p.mkdir(parents=True, exist_ok=True)

	def _extract_python_code(text: str) -> str:
	if not isinstance(text, str):
	return ""
	text = text.strip()

	fences = _CODE_FENCE_RE.findall(text)
	candidates = [f.strip() for f in fences] if fences else [text]

	for candidate in candidates:
	cleaned = _strip_to_valid_python(candidate)
	if cleaned:
	return cleaned
	return text

	def _strip_to_valid_python(code: str) -> str:
	try:
	ast.parse(code)
	return code
	except SyntaxError:
	pass

	lines = code.splitlines()

	for start in range(1, min(len(lines), 10) + 1):
	candidate = "\n".join(lines[start:])
	try:
	ast.parse(candidate)
	return candidate
	except SyntaxError:
	continue

	for end in range(len(lines) - 1, max(len(lines) - 10, 0), -1):
	candidate = "\n".join(lines[:end])
	try:
	ast.parse(candidate)
	return candidate
	except SyntaxError:
	continue

	return ""

	def _json_dumps(obj: Any) -> str:
	return json.dumps(obj, ensure_ascii=False, default=str)

	def _parse_json_safe(text: Any) -> Any:
	if not isinstance(text, str):
	return None

	cleaned = text.strip()
	if cleaned.startswith("```"):
	lines = cleaned.splitlines()
	if lines and lines[-1].strip() == "```":
	cleaned = "\n".join(lines[1:-1])
	else:
	cleaned = "\n".join(lines[1:])

	try:
	return json.loads(cleaned)
	except Exception:
	for start_char, end_char in [("{", "}"), ("[", "]")]:
	si = cleaned.find(start_char)
	ei = cleaned.rfind(end_char)
	if si != -1 and ei != -1 and ei > si:
	try:
	return json.loads(cleaned[si:ei + 1])
	except Exception:
	pass
	return None

	def _safe_import_class(name: str) -> Optional[type]:
	try:
	frame = inspect.stack()[2].frame
	cls = frame.f_globals.get(name) or builtins.__dict__.get(name)
	return cls if isinstance(cls, type) else None
	except Exception:
	return None

	def _safe_call(obj: Any, name: str, args: Any, default: Any = None, *kwargs: Any) -> Any:
	fn = getattr(obj, name, None)
	if not callable(fn):
	return default
	try:
	return fn(args, *kwargs)
	except Exception:
	return default

	def _bytes_to_chunks(data: bytes, chunk_size: int = _CHUNK_BYTES) -> List[bytes]:
	return [data[i: i + chunk_size] for i in range(0, max(len(data), 1), chunk_size)]

	def _chunks_to_bytes(chunks: List[bytes]) -> bytes:
	return b"".join(chunks)

	def _read_file_chunked(path: Optional[Union[str, Path]]) -> Optional[List[bytes]]:
	if not path:
	return None
	p = Path(path)
	if not p.exists():
	return None
	chunks: List[bytes] = []
	try:
	with open(p, "rb") as fh:
	while True:
	chunk = fh.read(_CHUNK_BYTES)
	if not chunk:
	break
	chunks.append(chunk)
	except Exception:
	return None
	return chunks if chunks else [b""]

	def _write_chunks_to_temp(chunks: Optional[List[bytes]], suffix: str, prefix: str = "packedllm_") -> Optional[str]:
	if not chunks:
	return None
	fd, path = tempfile.mkstemp(prefix=prefix, suffix=suffix)
	os.close(fd)
	try:
	with open(path, "wb") as fh:
	for chunk in chunks:
	fh.write(chunk)
	except Exception:
	try:
	os.unlink(path)
	except Exception:
	pass
	return None
	return path

	def _normalise_expert_name(name: str) -> str:
	s = re.sub(r"(?<=[a-z0-9])([A-Z])", r"_\1", name)
	return s.lower()

	def _expert_names_canonical(names: List[str]) -> List[str]:
	seen: set = set()
	out: List[str] = []
	for n in names:
	key = _normalise_expert_name(n)
	if key not in seen:
	seen.add(key)
	out.append(key)
	return out

	def capture_telemetry() -> Dict[str, Any]:
	process = psutil.Process(os.getpid())
	cpu_total_pct = psutil.cpu_percent(interval=None)
	cpu_process_pct = process.cpu_percent(interval=None)
	ram_info = psutil.virtual_memory()

	metrics: Dict[str, Any] = {
	"timestamp_ns": time.perf_counter_ns(),
	"cpu": {
	"system_total_percent": cpu_total_pct,
	"process_percent": cpu_process_pct,
	},
	"ram": {
	"system_total_gb": ram_info.total / (1024 ** 3),
	"system_available_gb": ram_info.available / (1024 ** 3),
	"system_used_gb": ram_info.used / (1024 ** 3),
	"process_rss_gb": process.memory_info().rss / (1024 ** 3),
	},
	"gpu_hardware_metrics": {
	"driver_detected": False,
	"device_name": "None",
	"total_vram_gb": 0.0,
	"used_vram_gb": 0.0,
	"free_vram_gb": 0.0,
	"gpu_utilization_percent": 0.0,
	},
	}

	try:
	cmd = (
	"nvidia-smi --query-gpu=name,memory.total,memory.free,memory.used,"
	"utilization.gpu --format=csv,noheader,nounits"
	)
	output = subprocess.check_output(cmd.split(), stderr=subprocess.DEVNULL).decode("ascii").strip()
	if output:
	parts = [p.strip() for p in output.split(",")]
	metrics["gpu_hardware_metrics"] = {
	"driver_detected": True,
	"device_name": parts[0],
	"total_vram_gb": float(parts[1]) / 1024.0,
	"free_vram_gb": float(parts[2]) / 1024.0,
	"used_vram_gb": float(parts[3]) / 1024.0,
	"gpu_utilization_percent": float(parts[4]),
	}
	except Exception:
	pass

	return metrics

	def calculate_delta(start: Dict[str, Any], end: Dict[str, Any]) -> Dict[str, Any]:
	s_gpu = start["gpu_hardware_metrics"]
	e_gpu = end["gpu_hardware_metrics"]
	vram_delta = (e_gpu["used_vram_gb"] - s_gpu["used_vram_gb"]) if e_gpu["driver_detected"] else 0.0
	gpu_util = e_gpu["gpu_utilization_percent"] if e_gpu["driver_detected"] else 0.0
	return {
	"ram_process_delta_gb": end["ram"]["process_rss_gb"] - start["ram"]["process_rss_gb"],
	"vram_allocated_delta_gb": vram_delta,
	"gpu_instantaneous_utilization_pct": gpu_util,
	"cpu_system_delta_pct": end["cpu"]["system_total_percent"] - start["cpu"]["system_total_percent"],
	}

	def normalize_unicode(s: str) -> str:
	s = unicodedata.normalize("NFKC", s)
	s = re.sub(r"[\u200B-\u200F\uFEFF\u00AD]", "", s)
	s = re.sub(r"[\x00-\x1F\x7F]", "", s)
	return s

	def canonicalize_numbers(s: str) -> str:
	return re.sub(r"\d+\.\d+\|\d+", "N", s)

	def strip_latex_wrappers(s: str) -> str:
	s = s.replace("\\[", "").replace("\\]", "")
	s = s.replace("\$", "").replace("\$", "")
	s = re.sub(r"\$+", "", s)
	return s

	def semantic_key(line: str) -> str:
	line = line.strip().lower()
	line = normalize_unicode(line)
	line = strip_latex_wrappers(line)
	line = re.sub(r"\\frac\{([^}])}\{([^}])}", r"frac(\1,\2)", line)
	line = re.sub(r"\^{\s([^}])\s*}", r"^(\1)", line)
	line = canonicalize_numbers(line)
	line = re.sub(r"\s+", " ", line)
	return line.strip()

	def collapse_repeated_blocks_with_report(text: str, block_size: int = 2) -> Tuple[str, List[str]]:
	lines = [l for l in text.splitlines() if l.strip()]
	out: List[str] = []
	seen = set()
	removed: List[str] = []
	i = 0
	while i < len(lines):
	block_lines = lines[i:i+block_size]
	block_keys = tuple(semantic_key(l) for l in block_lines)
	if len(block_keys) < block_size:
	out.extend(lines[i:])
	break
	if block_keys in seen:
	removed.extend(block_lines)
	i += 1
	continue
	seen.add(block_keys)
	out.extend(block_lines)
	i += block_size
	return "\n".join(out), removed

	def collapse_repeated_semantic_lines_with_report(text: str, max_repeat: int = 1) -> Tuple[str, List[str]]:
	out: List[str] = []
	removed: List[str] = []
	prev_key = None
	count = 0
	for line in text.splitlines():
	if not line.strip():
	out.append(line)
	prev_key = None
	count = 0
	continue
	k = semantic_key(line)
	if k == prev_key:
	count += 1
	if count > max_repeat:
	removed.append(line)
	continue
	else:
	prev_key = k
	count = 0
	out.append(line)
	return "\n".join(out), removed

	def collapse_repeated_lines(text: str, block_size: int = 2, max_repeat: int = 1, passes: int = 2, verbose: bool = True) -> str:
	out = text or ""
	total_removed: List[str] = []

	for _ in range(max(1, int(passes))):
	out, removed_blocks = collapse_repeated_blocks_with_report(out, block_size=block_size)
	total_removed.extend(removed_blocks)
	out, removed_lines = collapse_repeated_semantic_lines_with_report(out, max_repeat=max_repeat)
	total_removed.extend(removed_lines)

	seen = set()
	unique_removed = []
	for r in total_removed:
	if r not in seen:
	seen.add(r)
	unique_removed.append(r)

	if verbose:
	if unique_removed:
	print(f"[collapse_repeated_lines] Removed {len(unique_removed)} unique repeated line(s)/block(s). Examples:")
	for ex in unique_removed[:50]:
	print(f"- {ex}")
	else:
	print("[collapse_repeated_lines] No repeated blocks or semantic-line repeats detected.")

	return out.strip()
	def _now_iso() -> str:
	return datetime.utcnow().isoformat()

	def _norm_ws(text: str) -> str:
	return re.sub(r"\s+", " ", (text or "")).strip()

	def _safe_json_dumps(obj: Any) -> str:
	return json.dumps(obj, ensure_ascii=False, sort_keys=True, separators=(",", ":"))

	def _safe_json_loads(text: str, default: Any = None) -> Any:
	try:
	return json.loads(text)
	except Exception:
	return default

	def _maybe_list(x: Any) -> List[Any]:
	if x is None:
	return []
	if isinstance(x, list):
	return x
	if isinstance(x, tuple):
	return list(x)
	return [x]

	def _normalize_whitespace(text: str) -> str:
	return re.sub(r"\s+", " ", text or "").strip()

	def split_sentences(text: str) -> List[str]:
	text = _normalize_whitespace(text)
	if not text:
	return []
	parts = re.split(r"(?<=[.!?])\s+", text)
	return [p.strip() for p in parts if p.strip()]

	def _safe_hash(text: str) -> int:
	return hash(_normalize_whitespace(text))

	def _ensure_min_text(text: str, fallback: str = "") -> str:
	text = _normalize_whitespace(text)
	if text:
	return text
	return _normalize_whitespace(fallback)

	class FileManager:
	def __init__(self, base_dir: str):
	self.base_dir = os.path.abspath(base_dir)
	self.global_dir = os.path.join(self.base_dir, "global_data")
	os.makedirs(self.global_dir, exist_ok=True)

	def abs_path(self, path: str) -> str:
	return os.path.abspath(path)

	def copy_to_global(self, src: str, dest_name: Optional[str] = None) -> str:
	dest_name = dest_name or os.path.basename(src)
	dest = os.path.join(self.global_dir, dest_name)
	if os.path.abspath(src) == os.path.abspath(dest):
	return dest
	if not os.path.exists(dest):
	shutil.copy2(src, dest)
	return dest

	def write_bytes_to_global(self, data: bytes, dest_name: str) -> str:
	dest = os.path.join(self.global_dir, dest_name)
	with open(dest, "wb") as f:
	f.write(data)
	return dest

	def hardlink_or_copy(self, src: str, dst: str):
	try:
	os.link(src, dst)
	except Exception:
	shutil.copy2(src, dst)

	def atomic_replace(self, src_tmp: str, dest: str):
	os.replace(src_tmp, dest)

	def compute_sha256(self, path: str) -> str:
	h = hashlib.sha256()
	with open(path, "rb") as f:
	for chunk in iter(lambda: f.read(1 << 20), b""):
	h.update(chunk)
	return h.hexdigest()

	class CodeBoxError(Exception):
	pass

	class AssetNotFoundError(CodeBoxError):
	pass

	class RunnerCacheError(CodeBoxError):
	pass

	class LRUCache:
	def __init__(self, capacity: int = 2):
	self.capacity = capacity
	self.cache = OrderedDict()

	def get(self, key):
	if key not in self.cache:
	return None
	self.cache.move_to_end(key)
	return self.cache[key]

	def put(self, key, value):
	if key in self.cache:
	self.cache.move_to_end(key)
	self.cache[key] = value
	return
	self.cache[key] = value
	if len(self.cache) > self.capacity:
	old_key, old_val = self.cache.popitem(last=False)
	try:
	if hasattr(old_val, "close"):
	old_val.close()
	if hasattr(old_val, "cleanup"):
	old_val.cleanup()
	except Exception:
	pass
	try:
	import torch
	del old_val
	torch.cuda.empty_cache()
	except Exception:
	pass

	def keys(self):
	return list(self.cache.keys())

	def clear(self):
	self.cache.clear()
	try:
	import torch
	torch.cuda.empty_cache()
	except Exception:
	pass

	# The CodeBox
	class CodeBox:
	ASSETS_FILENAME = "assets.pt"
	ASSET_SCHEMA_VERSION = 1

	def __init__(self, base_dir: str = "./codebox_storage", runner_cache_capacity: int = 2):
	self.base_dir = os.path.abspath(base_dir)
	self.envs_dir = os.path.join(self.base_dir, "envs")
	self.file_manager = FileManager(self.base_dir)
	os.makedirs(self.envs_dir, exist_ok=True)
	self.env_dic: Dict[str, Dict[str, Any]] = {}
	self.code_bank: Dict[str, Dict[str, Any]] = {}
	self.asset_registry: Dict[str, Dict[str, Any]] = {}
	self._load_registry()
	self._runner_cache = LRUCache(capacity=runner_cache_capacity)
	self._ensure_loader_template()

	def _registry_path(self) -> str:
	return os.path.join(self.file_manager.global_dir, self.ASSETS_FILENAME)

	def _persist_registry(self):
	tmp_fd, tmp_path = tempfile.mkstemp(dir=self.file_manager.global_dir)
	os.close(tmp_fd)
	payload = {
	"schema_version": self.ASSET_SCHEMA_VERSION,
	"assets": self.asset_registry
	}
	torch.save(payload, tmp_path)
	self.file_manager.atomic_replace(tmp_path, self._registry_path())

	def _load_registry(self):
	path = self._registry_path()
	if os.path.exists(path):
	try:
	payload = torch.load(path)
	if isinstance(payload, dict) and "assets" in payload:
	self.asset_registry = payload["assets"]
	else:
	self.asset_registry = payload
	except Exception:
	corrupted = path + f".corrupt.{int(time.time())}"
	shutil.move(path, corrupted)
	self.asset_registry = {}
	else:
	self.asset_registry = {}

	def register_asset(self, alias: str, file_path: Optional[str] = None,
	asset_type: str = "bin", metadata: Dict = None,
	embed_bytes: Optional[bytes] = None, force: bool = False) -> Dict[str, Any]:
	metadata = metadata or {}
	if embed_bytes is not None:
	bytes_name = f"{alias}.embedded"
	dest = self.file_manager.write_bytes_to_global(embed_bytes, bytes_name)
	sha = self.file_manager.compute_sha256(dest)
	entry = {
	"source_path": dest,
	"type": asset_type,
	"metadata": metadata,
	"embedded": True,
	"bytes_name": bytes_name,
	"sha256": sha,
	"registered_at": time.time()
	}
	self.asset_registry[alias] = entry
	self._persist_registry()
	return entry

	if not file_path:
	raise ValueError("Provide either file_path or embed_bytes")

	src_abs = os.path.abspath(file_path)
	dest = self.file_manager.copy_to_global(src_abs, dest_name=os.path.basename(src_abs))
	sha = self.file_manager.compute_sha256(dest)
	entry = {
	"source_path": dest,
	"type": asset_type,
	"metadata": metadata,
	"embedded": False,
	"sha256": sha,
	"registered_at": time.time()
	}
	if alias in self.asset_registry and not force:
	if self.asset_registry[alias].get("sha256") == sha:
	return self.asset_registry[alias]
	self.asset_registry[alias] = entry
	self._persist_registry()
	return entry

	def unregister_asset(self, alias: str):
	if alias in self.asset_registry:
	del self.asset_registry[alias]
	self._persist_registry()

	def _ensure_loader_template(self):
	loader_path = os.path.join(self.file_manager.global_dir, "_codebox_loader.py")
	if os.path.exists(loader_path):
	return

	loader_code = r'''
	import os
	import json

	# Try optional heavy deps; if missing, fall back to JSON registry and path-only behavior.
	try:
	import torch as _torch
	except Exception:
	_torch = None

	try:
	from safetensors.torch import load_file as _safetensors_load
	except Exception:
	_safetensors_load = None

	def _registry_pt_path(assets_dir):
	return os.path.join(assets_dir, "assets.pt")

	def _registry_json_path(assets_dir):
	return os.path.join(assets_dir, "assets.json")

	def _load_registry(assets_dir):
	# Prefer torch payload if torch is available and file exists
	pt_path = _registry_pt_path(assets_dir)
	json_path = _registry_json_path(assets_dir)
	if _torch is not None and os.path.exists(pt_path):
	try:
	payload = _torch.load(pt_path)
	if isinstance(payload, dict) and "assets" in payload:
	return payload["assets"]
	return payload
	except Exception:
	# fall through to json
	pass
	if os.path.exists(json_path):
	with open(json_path, "r", encoding="utf-8") as f:
	return json.load(f)
	# last resort: try to load pt even without torch (will raise)
	if os.path.exists(pt_path):
	raise RuntimeError("Torch not available to read assets.pt; install torch or ensure assets.json exists.")
	return {}

	def load_asset(alias):
	assets_dir = os.environ.get("CODEBOX_ASSETS_DIR")
	if not assets_dir:
	raise RuntimeError("CODEBOX_ASSETS_DIR not set")
	registry = _load_registry(assets_dir)
	entry = registry.get(alias)
	if not entry:
	raise KeyError(f"Asset '{alias}' not found in registry")
	path = entry["source_path"]
	typ = entry.get("type", "bin")
	if typ == "safetensors":
	if _safetensors_load is None:
	return path
	return _safetensors_load(path)
	if typ == "pt":
	if _torch is None:
	raise RuntimeError("Torch not available in this environment to load .pt assets.")
	return _torch.load(path, map_location="cpu")
	if typ == "json":
	with open(path, "r", encoding="utf-8") as f:
	return json.load(f)
	return path
	'''
	loader_code = textwrap.dedent(loader_code)
	with open(loader_path, "w", encoding="utf-8") as f:
	f.write(loader_code)

	def _sync_required_assets(self, working_dir: str, required_assets: Optional[List[str]] = None):
	registry_dst_pt = os.path.join(working_dir, self.ASSETS_FILENAME)
	registry_dst_json = os.path.join(working_dir, "assets.json")
	payload = {"schema_version": self.ASSET_SCHEMA_VERSION, "assets": self.asset_registry}
	tmp_fd, tmp_path = tempfile.mkstemp(dir=working_dir)
	os.close(tmp_fd)
	torch.save(payload, tmp_path)
	os.replace(tmp_path, registry_dst_pt)
	tmp_fd, tmp_path = tempfile.mkstemp(dir=working_dir)
	os.close(tmp_fd)
	with open(tmp_path, "w", encoding="utf-8") as f:
	json.dump(self.asset_registry, f)
	os.replace(tmp_path, registry_dst_json)
	assets_to_mount = required_assets if required_assets else list(self.asset_registry.keys())
	for alias in assets_to_mount:
	entry = self.asset_registry.get(alias)
	if not entry:
	continue
	src = entry["source_path"]
	target = os.path.join(working_dir, os.path.basename(src))
	if not os.path.exists(target):
	try:
	self.file_manager.hardlink_or_copy(src, target)
	except Exception:
	shutil.copy2(src, target)

	def _inject_loader_into_env(self, env_src_dir: str):
	src = os.path.join(self.file_manager.global_dir, "_codebox_loader.py")
	dst = os.path.join(env_src_dir, "_codebox_loader.py")
	if not os.path.exists(dst):
	shutil.copy2(src, dst)

	def _get_python_bin(self, venv_path: str) -> str:
	if os.name == 'nt':
	return os.path.join(venv_path, "Scripts", "python.exe")
	return os.path.join(venv_path, "bin", "python")

	def create_venv(self, venv_id: str, requirements: List[str] = None) -> str:
	venv_path = os.path.join(self.envs_dir, venv_id)
	if venv_id not in self.env_dic:
	subprocess.run([sys.executable, "-m", "venv", venv_path], check=True)
	os.makedirs(os.path.join(venv_path, "src"), exist_ok=True)
	self.env_dic[venv_id] = {"path": venv_path, "packages": []}
	# inject loader template into src
	self._inject_loader_into_env(os.path.join(venv_path, "src"))
	if requirements:
	self.install_packages(venv_id, requirements)
	return venv_path

	def install_packages(self, venv_id: str, packages: List[str]):
	if venv_id not in self.env_dic:
	self.create_venv(venv_id)
	python_bin = self._get_python_bin(self.env_dic[venv_id]["path"])
	cmd = [python_bin, "-m", "pip", "install", "--quiet"] + packages
	subprocess.run(cmd, check=True)
	existing = set(self.env_dic[venv_id]["packages"])
	for pkg in packages:
	if pkg not in existing:
	self.env_dic[venv_id]["packages"].append(pkg)

	def export_venv(self, venv_id: str) -> str:
	if venv_id not in self.env_dic:
	raise ValueError(f"Environment '{venv_id}' does not exist.")
	python_bin = self._get_python_bin(self.env_dic[venv_id]["path"])
	res = subprocess.run([python_bin, "-m", "pip", "freeze"], capture_output=True, text=True)
	manifest = {
	"venv_id": venv_id,
	"pip_freeze": res.stdout.splitlines(),
	"metadata": {k: v for k, v in self.env_dic[venv_id].items() if k != "path"}
	}
	return json.dumps(manifest, indent=2)

	def import_venv(self, manifest_json: str):
	manifest = json.loads(manifest_json)
	venv_id = manifest["venv_id"]
	self.create_venv(venv_id, requirements=manifest["pip_freeze"])

	def _execute_supervised(self, python_bin: str, script_path: str, working_dir: str,
	timeout: Optional[int] = 30, max_ram_mb: int = 4096,
	required_assets: Optional[List[str]] = None) -> Dict[str, Any]:
	self._sync_required_assets(working_dir, required_assets)
	env_vars = os.environ.copy()
	env_vars["PYTHONPATH"] = working_dir
	env_vars["CODEBOX_ASSETS_DIR"] = working_dir
	proc = subprocess.Popen(
	[python_bin, script_path],
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE,
	cwd=working_dir,
	env=env_vars,
	text=True
	)

	alarms = []
	MAX_CPU = 95.0

	def monitor():
	try:
	p = psutil.Process(proc.pid)
	while proc.poll() is None:
	mem_mb = p.memory_info().rss / (1024 * 1024)
	cpu = p.cpu_percent(interval=0.2)
	if mem_mb > max_ram_mb:
	alarms.append(f"RESOURCE KILL: Memory usage exceeded ({mem_mb:.1f}MB > {max_ram_mb}MB)")
	proc.kill()
	break
	if cpu > MAX_CPU:
	alarms.append(f"RESOURCE WARNING: Sustained CPU spike ({cpu}%)")
	except psutil.NoSuchProcess:
	pass

	mon_thread = threading.Thread(target=monitor, daemon=True)
	mon_thread.start()

	try:
	stdout, stderr = proc.communicate(timeout=timeout)
	except subprocess.TimeoutExpired:
	proc.kill()
	stdout, stderr = proc.communicate()
	alarms.append(f"RESOURCE KILL: Code execution timed out ({timeout}s limit).")

	return {
	"stdout": stdout.strip(),
	"stderr": stderr.strip(),
	"success": proc.returncode == 0 and not any("KILL" in a for a in alarms),
	"exit_code": proc.returncode,
	"technical_alarms": alarms
	}

	def run_code(self, code_block: str, venv_id: str = "default", requirements: List[str] = None,
	timeout: Optional[int] = 30, max_ram_mb: int = 4096,
	required_assets: Optional[List[str]] = None) -> Dict[str, Any]:
	is_temp = False
	if requirements:
	venv_id = f"temp_{int(time.time())}"
	self.create_venv(venv_id, requirements)
	is_temp = True
	elif venv_id not in self.env_dic:
	self.create_venv(venv_id)
	env_meta = self.env_dic[venv_id]
	src_dir = os.path.join(env_meta["path"], "src")
	python_bin = self._get_python_bin(env_meta["path"])
	temp_script = os.path.join(src_dir, f"_run_{int(time.time())}.py")
	with open(temp_script, 'w', encoding='utf-8') as f:
	f.write(code_block)
	try:
	result = self._execute_supervised(python_bin, temp_script, src_dir, timeout, max_ram_mb, required_assets)
	finally:
	if os.path.exists(temp_script):
	os.remove(temp_script)
	if is_temp:
	shutil.rmtree(env_meta["path"], ignore_errors=True)
	del self.env_dic[venv_id]

	return result

	def save_script(self, code_id: str, venv_id: str, source_code: str):
	if venv_id not in self.env_dic:
	self.create_venv(venv_id)
	src_dir = os.path.join(self.env_dic[venv_id]["path"], "src")
	filepath = os.path.join(src_dir, f"{code_id}.py")
	with open(filepath, 'w', encoding='utf-8') as f:
	f.write(source_code)
	self.code_bank[code_id] = {"venv_id": venv_id, "filepath": filepath}
	self._inject_loader_into_env(src_dir)

	def call_function(self, code_id: str = None, function_call: str = None, function_map: Dict[str, Any] = None,
	timeout: Optional[int] = 30, max_ram_mb: int = 4096,
	required_assets: Optional[List[str]] = None) -> Dict[str, Any]:
	if function_map:
	first_step = list(function_map.values())[0]
	venv_id = self.code_bank[first_step["code_id"]]["venv_id"]
	env_meta = self.env_dic[venv_id]
	src_dir = os.path.join(env_meta["path"], "src")
	python_bin = self._get_python_bin(env_meta["path"])
	lines = ["import json\nimport sys\ncontext = {}\n"]
	for step, data in function_map.items():
	c_id, f_name = data["code_id"], data["function"]
	args = data.get("args", {})
	out_var = data.get("output_var", f"out_{step}")
	lines.append(f"import {c_id}")
	arg_strs = []
	for k, v in args.items():
	if isinstance(v, str) and v.startswith("$"):
	arg_strs.append(f"{k}=context['{v[1:]}']")
	else:
	arg_strs.append(f"{k}={repr(v)}")
	lines.append(f"try:\n context['{out_var}'] = {c_id}.{f_name}({', '.join(arg_strs)})")
	lines.append(f"except Exception as e:\n print(f'Pipeline failed at {step}: {{e}}', file=sys.stderr)\n sys.exit(1)\n")
	lines.append("print(json.dumps(context))")
	wrapper_code = "\n".join(lines)
	wrapper_path = os.path.join(src_dir, "_dag_runner.py")
	with open(wrapper_path, 'w', encoding='utf-8') as f:
	f.write(wrapper_code)
	try:
	result = self._execute_supervised(python_bin, wrapper_path, src_dir, timeout, max_ram_mb, required_assets)
	finally:
	if os.path.exists(wrapper_path):
	os.remove(wrapper_path)
	return result

	elif code_id and function_call:
	import ast
	try:
	tree = ast.parse(function_call)
	expr = tree.body[0].value
	if not isinstance(expr, ast.Call):
	raise ValueError("Target signature is not a valid call statement.")
	func_name = expr.func.id
	extracted_args = {}
	for keyword in expr.keywords:
	extracted_args[keyword.arg] = ast.literal_eval(keyword.value)
	except Exception as e:
	raise ValueError(f"AST Function Call Parser failed on expression matching: {e}")
	macro_map = {
	"step_1": {
	"code_id": code_id,
	"function": func_name,
	"args": extracted_args,
	"output_var": "result"
	}
	}
	return self.call_function(function_map=macro_map, timeout=timeout, max_ram_mb=max_ram_mb, required_assets=required_assets)
	else:
	raise ValueError("Provide either code_id + function_call, or function_map")

	def get_runner(self, key: str):
	return self._runner_cache.get(key)

	def put_runner(self, key: str, runner_obj):
	self._runner_cache.put(key, runner_obj)

	def prune_cache(self, days_unused: int = 7, logger=None):
	cutoff = time.time() - days_unused * 86400
	assets_to_keep = {self.ASSETS_FILENAME, "_codebox_loader.py"}
	try:
	with os.scandir(self.file_manager.global_dir) as it:
	for entry in it:
	try:
	if entry.is_dir():
	continue
	name = entry.name
	if name in assets_to_keep:
	continue
	mtime = entry.stat().st_mtime
	if mtime < cutoff:
	try:
	os.remove(entry.path)
	except Exception as e:
	if logger:
	logger.warning("Failed to remove %s: %s", entry.path, e)
	except FileNotFoundError:
	continue
	except PermissionError:
	if logger:
	logger.warning("Permission denied pruning %s", entry.path)
	continue
	except Exception as e:
	if logger:
	logger.error("Prune cache failed for %s: %s", self.file_manager.global_dir, e)

	def resolve_asset_path(self, alias: str) -> str:
	entry = self.asset_registry.get(alias)
	if not entry:
	raise AssetNotFoundError(alias)
	return entry["source_path"]

	class Box:
	"""
	Persistent wrapper around CodeBox.

	Resolution order:

	1. box_location argument
	2. AppData config location
	3. Create new CodeBox.pt
	"""

	APP_NAME = "CodeBox"
	CONFIG_FILE = "box_config.json"
	DEFAULT_MODEL_FILE = "CodeBox.pt"

	def __init__(
	self,
	box_location: Optional[str] = None,
	base_dir: Optional[str] = None,
	runner_cache_capacity: int = 2,
	):
	self._model_path = self._resolve_model_path(box_location)

	if os.path.exists(self._model_path):
	self.model = torch.load(self._model_path)
	else:
	self.model = CodeBox(
	base_dir=base_dir or self._default_storage_dir(),
	runner_cache_capacity=runner_cache_capacity,
	)
	self.save()

	@classmethod
	def _appdata_dir(cls):
	if os.name == "nt":
	root = os.getenv("APPDATA")
	else:
	root = os.path.expanduser("~/.config")

	path = os.path.join(root, cls.APP_NAME)
	os.makedirs(path, exist_ok=True)
	return path

	@classmethod
	def _config_path(cls):
	return os.path.join(cls._appdata_dir(), cls.CONFIG_FILE)

	@classmethod
	def _default_storage_dir(cls):
	path = os.path.join(cls._appdata_dir(), "storage")
	os.makedirs(path, exist_ok=True)
	return path

	@classmethod
	def _default_model_path(cls):
	return os.path.join(cls._appdata_dir(), cls.DEFAULT_MODEL_FILE)

	def _resolve_model_path(self, box_location):

	if box_location:
	path = os.path.abspath(box_location)
	self._write_config(path)
	return path

	config = self._read_config()

	if config:
	saved_path = config.get("box_location")

	if saved_path and os.path.exists(saved_path):
	return saved_path

	path = self._default_model_path()

	self._write_config(path)

	return path

	def _read_config(self):
	cfg = self._config_path()

	if not os.path.exists(cfg):
	return {}

	try:
	with open(cfg, "r", encoding="utf-8") as f:
	return json.load(f)
	except Exception:
	return {}

	def _write_config(self, model_path):
	cfg = self._config_path()

	with open(cfg, "w", encoding="utf-8") as f:
	json.dump(
	{
	"box_location": os.path.abspath(model_path)
	},
	f,
	indent=2,
	)

	def save(self):
	torch.save(self.model, self._model_path)

	def save_as(self, path):
	path = os.path.abspath(path)

	torch.save(self.model, path)

	self._model_path = path

	self._write_config(path)

	@classmethod
	def load(cls, path):
	return cls(box_location=path)

	def register_asset(self, args, *kwargs):
	result = self.model.register_asset(args, *kwargs)
	self.save()
	return result

	def unregister_asset(self, args, *kwargs):
	result = self.model.unregister_asset(args, *kwargs)
	self.save()
	return result

	def create_venv(self, args, *kwargs):
	result = self.model.create_venv(args, *kwargs)
	self.save()
	return result

	def install_packages(self, args, *kwargs):
	result = self.model.install_packages(args, *kwargs)
	self.save()
	return result

	def save_script(self, args, *kwargs):
	result = self.model.save_script(args, *kwargs)
	self.save()
	return result

	def put_runner(self, args, *kwargs):
	result = self.model.put_runner(args, *kwargs)
	self.save()
	return result

	def __getattr__(self, name):
	return getattr(self.model, name)

	def __contains__(self, alias):
	return alias in self.model.asset_registry

	def __len__(self):
	return len(self.model.asset_registry)

	def __repr__(self):
	return (
	f"Box("
	f"assets={len(self.model.asset_registry)}, "
	f"envs={len(self.model.env_dic)}, "
	f"path='{self._model_path}')"
	)

	def close(self):
	try:
	self.model._runner_cache.clear()
	except Exception:
	pass
	self.save()

	def __del__(self):
	try:
	self.close()
	except Exception:
	pass

	class TextModelBundle:
	def __init__(self, model_dir: str = MODEL_DIR):
	self.model_dir = model_dir
	self.embedding_path = os.path.join(model_dir, "all-MiniLM-L6-v2")
	self.summarizer_path = os.path.join(model_dir, "distilbart-cnn-12-6")
	self.spacy_model_path = os.path.join(model_dir, "spacy", "en_core_web_sm")
	self.spacy_model_name = SPACY_MODEL_NAME

	self.embedding_model = self._load_embeddings()
	self.summarizer_model, self.tokenizer = self._load_summarizer()
	self.nlp = self._load_spacy()

	def _load_embeddings(self):
	if os.path.exists(self.embedding_path) and os.path.isdir(self.embedding_path):
	return SentenceTransformer(self.embedding_path)
	model = SentenceTransformer("all-MiniLM-L6-v2")
	model.save(self.embedding_path)
	return model

	def _load_summarizer(self):
	if os.path.exists(self.summarizer_path) and os.path.isdir(self.summarizer_path):
	model = AutoModelForSeq2SeqLM.from_pretrained(self.summarizer_path)
	tokenizer = AutoTokenizer.from_pretrained(self.summarizer_path)
	return model, tokenizer
	model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
	tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
	model.save_pretrained(self.summarizer_path)
	tokenizer.save_pretrained(self.summarizer_path)
	return model, tokenizer

	def _load_spacy(self):
	try:
	if os.path.exists(self.spacy_model_path):
	return spacy.load(self.spacy_model_path)
	return spacy.load(self.spacy_model_name)
	except Exception:
	nlp = spacy.blank("en")
	if "sentencizer" not in nlp.pipe_names:
	nlp.add_pipe("sentencizer")
	return nlp

	def embed(self, texts, convert_to_tensor=True):
	return self.embedding_model.encode(texts, convert_to_tensor=convert_to_tensor)

	def generate_summary(self, text: str, max_length: int = 300, min_length: int = 50) -> str:
	text = _normalize_whitespace(text)
	if not text:
	return ""

	inputs = self.tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	max_length=1024,
	)
	device = next(self.summarizer_model.parameters()).device
	inputs = {k: v.to(device) for k, v in inputs.items()}

	with torch.no_grad():
	summary_ids = self.summarizer_model.generate(
	**inputs,
	max_length=max_length,
	min_length=min_length,
	num_beams=4,
	early_stopping=True,
	)
	return self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)

	def chunk_text_by_context(text: str, bundle: TextModelBundle, num_chunks: int = 20) -> List[str]:
	sentences = split_sentences(text)
	if not sentences:
	return []
	if len(sentences) == 1:
	return [sentences[0]]

	embeddings = bundle.embed(sentences)
	if embeddings.ndim != 2 or embeddings.shape[0] <= 1:
	return [" ".join(sentences)]

	n_samples, n_features = embeddings.shape
	n_components = max(1, min(num_chunks, n_samples, n_features))
	reduced = PCA(n_components=n_components).fit_transform(embeddings)

	k = min(num_chunks, n_samples)
	clustering = AgglomerativeClustering(n_clusters=k)
	labels = clustering.fit_predict(reduced)

	chunks: Dict[int, List[str]] = {}
	for sent, lbl in zip(sentences, labels):
	chunks.setdefault(int(lbl), []).append(sent)

	return [" ".join(chunks[i]) for i in sorted(chunks.keys())]


	def safe_summarize_iterative(bundle: TextModelBundle, text: str, max_length: int = 500, min_length: int = 300, overlap: int = 100) -> str:
	text = _normalize_whitespace(text)
	if not text:
	return ""

	word_threshold = max_length
	if len(text.split()) <= word_threshold:
	return text

	token_ids = bundle.tokenizer.encode(text, add_special_tokens=False)
	if len(token_ids) <= 1024:
	try:
	return bundle.generate_summary(text, max_length=max_length, min_length=min_length)
	except Exception:
	return text

	sentences = split_sentences(text)
	if len(sentences) > 1:
	mid = len(sentences) // 2
	left = " ".join(sentences[:mid])
	right = " ".join(sentences[mid:])
	a = safe_summarize_iterative(bundle, left, max_length, min_length, overlap)
	b = safe_summarize_iterative(bundle, right, max_length, min_length, overlap)
	return _normalize_whitespace(f"{a} {b}")

	chunks = []
	start = 0
	while start < len(token_ids):
	end = min(start + 1024, len(token_ids))
	chunk_tokens = token_ids[start:end]
	chunk_text = bundle.tokenizer.decode(
	chunk_tokens,
	skip_special_tokens=True,
	clean_up_tokenization_spaces=True,
	)
	chunks.append(chunk_text)
	if end == len(token_ids):
	break
	start = end - overlap

	summaries = []
	for chunk in chunks:
	chunk = _normalize_whitespace(chunk)
	if not chunk:
	continue
	if len(chunk.split()) <= word_threshold:
	summaries.append(chunk)
	continue
	try:
	summaries.append(bundle.generate_summary(chunk, max_length=max_length, min_length=min_length))
	except Exception:
	summaries.append(chunk)

	combined = _normalize_whitespace(" ".join(summaries))
	if not combined:
	return text

	if len(combined.split()) <= word_threshold:
	return combined
	return safe_summarize_iterative(bundle, combined, max_length, min_length, overlap)


	def safe_summarize(bundle: TextModelBundle, text: str, max_length: int = 750, min_length: int = 500, overlap: int = 250, depth: int = 0, max_depth: int = 15) -> str:
	text = _normalize_whitespace(text)
	if not text:
	return ""

	if depth > max_depth:
	return safe_summarize_iterative(bundle, text, max_length=max_length, min_length=min_length, overlap=overlap)

	if len(text.split()) <= max_length:
	return text

	token_ids = bundle.tokenizer.encode(text, add_special_tokens=False)
	if len(token_ids) <= 1024:
	try:
	return bundle.generate_summary(text, max_length=max_length, min_length=min_length)
	except Exception:
	return text

	sentences = split_sentences(text)
	if len(sentences) > 1:
	mid = len(sentences) // 2
	left = " ".join(sentences[:mid])
	right = " ".join(sentences[mid:])
	try:
	s1 = safe_summarize(bundle, left, max_length, min_length, overlap, depth + 1, max_depth)
	s2 = safe_summarize(bundle, right, max_length, min_length, overlap, depth + 1, max_depth)
	return safe_summarize(bundle, f"{s1} {s2}", max_length, min_length, overlap, depth + 1, max_depth)
	except RecursionError:
	return safe_summarize_iterative(bundle, text, max_length=max_length, min_length=min_length, overlap=overlap)

	pieces = []
	start = 0
	while start < len(token_ids):
	end = min(start + 1024, len(token_ids))
	chunk_tokens = token_ids[start:end]
	chunk_text = bundle.tokenizer.decode(
	chunk_tokens,
	skip_special_tokens=True,
	clean_up_tokenization_spaces=True,
	)
	pieces.append(chunk_text)
	if end == len(token_ids):
	break
	start = end - overlap

	chunk_summaries = []
	for piece in pieces:
	piece = _normalize_whitespace(piece)
	if not piece:
	continue
	if len(piece.split()) <= max_length:
	chunk_summaries.append(piece)
	else:
	try:
	chunk_summaries.append(bundle.generate_summary(piece, max_length=max_length, min_length=min_length))
	except Exception:
	chunk_summaries.append(piece)

	combined = _normalize_whitespace(" ".join(chunk_summaries))
	if not combined:
	return text
	if len(combined.split()) <= max_length:
	return combined
	return safe_summarize(bundle, combined, max_length, min_length, overlap, depth + 1, max_depth)

	def summarize_relevant_clusters(bundle: TextModelBundle, input_query: str, texts: List[str], similarity_threshold: Optional[float] = None, num_clusters: int = 12) -> List[str]:
	texts = [_normalize_whitespace(t) for t in texts if _normalize_whitespace(t)]
	if not texts:
	return []

	if len(texts) == 1:
	return [safe_summarize(bundle, texts[0])]

	embeddings = bundle.embed(texts, convert_to_tensor=True)
	sim_matrix = util.pytorch_cos_sim(embeddings, embeddings)

	if similarity_threshold is None:
	if sim_matrix.size(0) > 1:
	idx = torch.triu_indices(sim_matrix.size(0), sim_matrix.size(1), offset=1)
	similarities = sim_matrix[idx[0], idx[1]]
	similarity_threshold = similarities.mean().item() if similarities.numel() > 0 else 0.85
	else:
	similarity_threshold = 0.85

	keep_indices = []
	for i in range(len(texts)):
	if not any(float(sim_matrix[i][j].item()) > similarity_threshold for j in keep_indices):
	keep_indices.append(i)

	dedup_texts = [texts[i] for i in keep_indices]
	if not dedup_texts:
	dedup_texts = texts[:]

	dedup_embeddings = embeddings[keep_indices] if keep_indices else embeddings
	n_clusters = min(num_clusters, len(dedup_texts))
	n_clusters = max(1, n_clusters)

	if len(dedup_texts) == 1:
	return [safe_summarize(bundle, dedup_texts[0])]

	kmeans = KMeans(n_clusters=n_clusters, random_state=0, n_init="auto")
	labels = kmeans.fit_predict(dedup_embeddings.cpu().numpy())

	clusters: Dict[int, List[str]] = {}
	for idx, lbl in enumerate(labels):
	clusters.setdefault(int(lbl), []).append(dedup_texts[idx])

	try:
	doc = bundle.nlp(input_query)
	keywords = {token.lemma_.lower() for token in doc if getattr(token, "pos_", "") in ("NOUN", "VERB", "ADJ", "PROPN")}
	except Exception:
	keywords = set()

	def is_cluster_relevant(cluster_texts: List[str]) -> bool:
	if not keywords:
	return True
	joined = " ".join(cluster_texts).lower()
	return any(k in joined for k in keywords)

	relevant_clusters = [c for c in clusters.values() if is_cluster_relevant(c)]
	if not relevant_clusters:
	relevant_clusters = list(clusters.values())

	cluster_summaries = []
	for cluster in relevant_clusters:
	combined = _normalize_whitespace(" ".join(cluster))
	if combined:
	cluster_summaries.append(safe_summarize(bundle, combined))

	if not cluster_summaries:
	return [safe_summarize(bundle, " ".join(dedup_texts))]

	final_text = _normalize_whitespace(" ".join(cluster_summaries))
	return [safe_summarize(bundle, final_text)]


	@dataclass
	class PageCandidate:
	title: str
	url: str
	snippet: str = ""
	rank: float = 0.0


	@dataclass
	class CrawlResult:
	query: str
	answer: str = ""
	partial_texts: List[str] = field(default_factory=list)
	used_candidates: List[str] = field(default_factory=list)
	failed_candidates: List[str] = field(default_factory=list)
	fallback_used: bool = False
	elapsed_seconds: float = 0.0
	error: Optional[str] = None

	class CrawlWorker:
	def __init__(self):
	self.bundle = TextModelBundle()
	self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=8)
	self.session = requests.Session()
	self.session.headers.update({
	"User-Agent": (
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
	"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
	),
	"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8",
	"Accept-Language": "en-US,en;q=0.5",
	})

	def close(self):
	try:
	self.executor.shutdown(wait=False, cancel_futures=True)
	except Exception:
	pass
	try:
	self.session.close()
	except Exception:
	pass

	def run_query(self, query: str, stats: Optional[dict] = None) -> CrawlResult:
	t0 = time.perf_counter()
	result = CrawlResult(query=query)

	try:
	candidates = self.build_candidates(query)
	if not candidates:
	result.answer = f"No results found for: {query}"
	result.fallback_used = True
	return result

	target_successes = 5
	successful_texts: List[str] = []
	fallback_snippets: List[str] = []

	for idx, cand in enumerate(candidates):
	if len(successful_texts) >= target_successes:
	break

	text = self.fetch_and_extract(cand.url, page_timeout=20)
	if text and len(text.strip()) >= 80:
	successful_texts.append(_normalize_whitespace(text))
	result.used_candidates.append(cand.url)
	else:
	result.failed_candidates.append(cand.url)
	if cand.snippet:
	fallback_snippets.append(f"{cand.title}. {cand.snippet}".strip())

	if len(successful_texts) < target_successes:
	for cand in candidates[len(successful_texts) + len(result.failed_candidates):]:
	if len(successful_texts) >= target_successes:
	break
	if cand.url in result.used_candidates or cand.url in result.failed_candidates:
	continue
	text = self.fetch_and_extract(cand.url, page_timeout=20)
	if text and len(text.strip()) >= 80:
	successful_texts.append(_normalize_whitespace(text))
	result.used_candidates.append(cand.url)
	else:
	result.failed_candidates.append(cand.url)
	if cand.snippet:
	fallback_snippets.append(f"{cand.title}. {cand.snippet}".strip())

	if successful_texts:
	merged = " ".join(successful_texts)
	chunks = chunk_text_by_context(merged, self.bundle, num_chunks=min(8, max(2, len(successful_texts))))
	summary_list = summarize_relevant_clusters(self.bundle, query, chunks, similarity_threshold=None, num_clusters=min(8, len(chunks)))
	answer = _normalize_whitespace(" ".join(summary_list))
	result.answer = answer if answer else _normalize_whitespace(merged)
	result.partial_texts = successful_texts
	return result

	snippet_text = _normalize_whitespace(" ".join(fallback_snippets))
	if snippet_text:
	result.answer = snippet_text
	result.fallback_used = True
	return result

	ranked_text = _normalize_whitespace(" ".join(f"{c.title}. {c.snippet}".strip() for c in candidates[:5]))
	result.answer = ranked_text if ranked_text else f"No usable text found for: {query}"
	result.fallback_used = True
	return result
	except Exception as e:
	result.error = f"{e}\n{traceback.format_exc()}"
	result.answer = result.answer or f"[ERROR] {e}"
	return result
	finally:
	result.elapsed_seconds = time.perf_counter() - t0
	if stats is not None:
	stats.setdefault("runs", []).append({
	"query": query,
	"seconds": round(result.elapsed_seconds, 3),
	"used_pages": len(result.used_candidates),
	"failed_pages": len(result.failed_candidates),
	"fallback_used": result.fallback_used,
	"error": result.error,
	})

	def build_candidates(self, query: str, num_results: int = 15) -> List[PageCandidate]:
	raw = []
	raw.extend(self.duckduckgo_search(query, num_results=num_results))
	raw.extend(self.resulthunter_search(query, num_results=num_results))
	raw.extend(self.google_search(query, num_results=num_results))
	deduped: List[PageCandidate] = []
	seen = set()
	scored = []
	for title, url, snippet in raw:
	norm = self.normalize_url(url)
	if not norm or norm in seen:
	continue
	seen.add(norm)
	scored.append(PageCandidate(title=title or norm, url=url, snippet=snippet or ""))
	if not scored:
	return []
	titles_and_snippets = [f"{c.title} {c.snippet}".strip() for c in scored]
	query_emb = self.bundle.embed([query], convert_to_tensor=True)[0]
	page_embs = self.bundle.embed(titles_and_snippets, convert_to_tensor=True)
	sim_scores = util.cos_sim(query_emb, page_embs)[0]
	order = sim_scores.argsort(descending=True).tolist()
	ordered = [scored[i] for i in order]
	return ordered[:15]

	def fetch_and_extract(self, url: str, page_timeout: int = 20) -> str:
	url = self.normalize_url(url)
	if not url:
	return ""
	future = self.executor.submit(self._fetch_and_extract_sync, url)
	try:
	return future.result(timeout=page_timeout) or ""
	except concurrent.futures.TimeoutError:
	return ""
	except Exception:
	return ""

	def _fetch_and_extract_sync(self, url: str) -> str:
	try:
	r = self.session.get(url, timeout=(8, 15), allow_redirects=True)
	r.raise_for_status()
	except Exception:
	return ""

	content_type = r.headers.get("Content-Type", "").lower()
	if "application/pdf" in content_type or url.lower().endswith(".pdf"):
	text = self._extract_pdf_bytes(r.content)
	return _normalize_whitespace(text)

	html = r.text or ""
	text = self.universal_page_parser(url, html, response=r, use_browser=False)
	return _normalize_whitespace(text)

	def _extract_pdf_bytes(self, pdf_bytes: bytes) -> str:
	try:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
	tmp.write(pdf_bytes)
	pdf_path = tmp.name
	doc = fitz.open(pdf_path)
	text = " ".join(page.get_text() for page in doc)
	doc.close()
	try:
	os.remove(pdf_path)
	except Exception:
	pass
	return text
	except Exception:
	return ""

	def universal_page_parser(self, url: str, html: str, response=None, use_browser: bool = False) -> str:
	text = ""
	if YouTubeTranscriptApi is not None and ("youtu" in url.lower()):
	video_id = self._extract_youtube_id(url)
	if video_id:
	try:
	transcript = YouTubeTranscriptApi.get_transcript(video_id)
	text = " ".join(entry["text"] for entry in transcript).strip()
	if text:
	return text
	except Exception:
	pass
	if trafilatura is not None:
	try:
	t = trafilatura.extract(html, include_comments=False, include_tables=True)
	if t and len(t.strip()) > 80:
	return t.strip()
	except Exception:
	pass
	if extractors is not None:
	try:
	boilerpy_text = extractors.ArticleExtractor().get_content(html)
	if boilerpy_text and len(boilerpy_text.strip()) > 80:
	return boilerpy_text.strip()
	except Exception:
	pass
	if Document is not None:
	try:
	doc = Document(html)
	soup = BeautifulSoup(doc.summary(), "html.parser")
	text_readability = soup.get_text(" ", strip=True)
	if text_readability and len(text_readability.strip()) > 80:
	return text_readability.strip()
	except Exception:
	pass
	if Article is not None:
	try:
	article = Article(url)
	article.set_html(html)
	article.parse()
	text_newspaper = article.text or ""
	if text_newspaper and len(text_newspaper.strip()) > 80:
	return text_newspaper.strip()
	except Exception:
	pass
	if Goose is not None:
	try:
	goose_text = Goose().extract(raw_html=html).cleaned_text
	if goose_text and len(goose_text.strip()) > 80:
	return goose_text.strip()
	except Exception:
	pass
	if inscriptis_text is not None:
	try:
	inscriptis_parsed = inscriptis_text(html)
	if inscriptis_parsed and len(inscriptis_parsed.strip()) > 80:
	return inscriptis_parsed.strip()
	except Exception:
	pass
	if lxml_html is not None:
	try:
	lxml_tree = lxml_html.fromstring(html)
	lxml_text = " ".join(lxml_tree.xpath("//p//text()"))
	if lxml_text and len(lxml_text.strip()) > 80:
	return lxml_text.strip()
	except Exception:
	pass
	try:
	soup = BeautifulSoup(html, "html.parser")
	for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside", "form", "input", "button", "svg", "canvas", "iframe", "object", "embed", "img", "video", "audio"]):
	tag.decompose()
	tags = ["p", "li", "span", "div", "h1", "h2", "h3", "h4", "h5", "h6"]
	bs_text = " ".join(t.get_text(" ", strip=True) for tag in tags for t in soup.find_all(tag))
	if bs_text and len(bs_text.strip()) > 80:
	return bs_text.strip()
	except Exception:
	pass

	try:
	soup = BeautifulSoup(html, "html.parser")
	for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside", "form", "input", "button", "svg", "canvas", "iframe", "object", "embed", "img", "video", "audio"]):
	tag.decompose()
	visible_text = soup.get_text(" ", strip=True)
	if visible_text and len(visible_text.strip()) > 50:
	return visible_text.strip()
	except Exception:
	pass

	return ""

	def _extract_youtube_id(self, url: str) -> Optional[str]:
	patterns = [
	r"(?:v=)([A-Za-z0-9_-]{11})",
	r"youtu\.be/([A-Za-z0-9_-]{11})",
	r"youtube\.com/shorts/([A-Za-z0-9_-]{11})",
	r"youtube\.com/embed/([A-Za-z0-9_-]{11})",
	]
	for pattern in patterns:
	m = re.search(pattern, url)
	if m:
	return m.group(1)
	return None

	def normalize_url(self, url: str) -> str:
	if not url:
	return ""

	if "resulthunter.com" in url:
	qs = parse_qs(urlparse(url).query)
	if "url" in qs:
	return unquote(qs["url"][0])

	if url.startswith("/videos/watch/"):
	parsed = urlparse(url)
	path_parts = parsed.path.split("/")
	if len(path_parts) >= 4:
	video_id = path_parts[3]
	if len(video_id) >= 11:
	return f"https://www.youtube.com/watch?v={video_id}"

	if "duckduckgo.com/l/" in url:
	qs = parse_qs(urlparse(url).query)
	if "uddg" in qs and qs["uddg"]:
	return unquote(qs["uddg"][0])

	return url if url.startswith("http") else f"https://{url.lstrip('/')}"

	def is_ad_link(self, url: str) -> bool:
	ad_keywords = ["advert", "ads", "doubleclick", "sponsor", "promo"]
	return any(term in (url or "").lower() for term in ad_keywords)

	# ---------------- search engines ----------------
	def duckduckgo_search(self, query: str, num_results: int = 10):
	url = "https://html.duckduckgo.com/html/"
	data = {"q": query}
	try:
	r = self.session.post(url, data=data, timeout=(8, 20))
	r.raise_for_status()
	except requests.RequestException:
	return []

	soup = BeautifulSoup(r.text, "html.parser")
	results = []
	for result in soup.select("div.result"):
	title_a = result.select_one("a.result__url") or result.select_one("a.result__a")
	snippet_a = result.select_one("a.result__snippet") or result.select_one("div.result__snippet")
	if not title_a:
	continue
	title = title_a.get_text(strip=True)
	href = title_a.get("href", "")
	if href.startswith("//duckduckgo.com/l/?uddg="):
	href = unquote(href.split("uddg=")[1].split("&")[0])
	snippet = snippet_a.get_text(strip=True) if snippet_a else ""
	results.append((title, href, snippet))
	if len(results) >= num_results:
	break
	return results

	def resulthunter_search(self, query: str, num_results: int = 10):
	encoded_query = quote(query)
	url = f"https://www.resulthunter.com/search?q={encoded_query}"
	try:
	r = self.session.get(url, timeout=(8, 15))
	r.raise_for_status()
	except requests.RequestException:
	return []

	soup = BeautifulSoup(r.text, "html.parser")
	results = []
	result_divs = soup.find_all("div", class_="web-result")
	if not result_divs:
	result_divs = soup.find_all("div", class_=lambda c: c and "result" in c.lower())

	for result in result_divs:
	link_tag = result.find("a", href=True)
	if not link_tag:
	continue
	title = link_tag.get_text(strip=True)
	link = link_tag["href"]
	if not link.startswith("http"):
	continue
	snippet_tag = result.find("p", class_="web-result-desc") or result.find("p")
	snippet = snippet_tag.get_text(strip=True) if snippet_tag else ""
	results.append((title, link, snippet))
	if len(results) >= num_results:
	break
	return results

	def google_search(self, query: str, num_results: int = 10):
	encoded_query = quote(query)
	url = f"https://www.google.com/search?q={encoded_query}&num={num_results + 5}"
	try:
	r = self.session.get(url, timeout=(8, 15))
	r.raise_for_status()
	except requests.RequestException:
	return []

	soup = BeautifulSoup(r.text, "html.parser")
	results = []
	for g in soup.select("div.g"):
	title_el = g.select_one("h3")
	link_el = g.select_one("a[href]")
	if not (title_el and link_el):
	continue
	title = title_el.get_text(strip=True)
	href = link_el["href"]
	snippet_el = g.select_one("div.VwiC3b")
	if snippet_el:
	snippet = snippet_el.get_text(strip=True)
	else:
	snippet = g.get_text(separator=" ", strip=True).replace(title, "", 1).strip()
	results.append((title, href, snippet))
	if len(results) >= num_results:
	break
	return results

	def _worker_main(command_queue: Queue, response_queue: Queue):
	worker = CrawlWorker()
	try:
	while True:
	msg = command_queue.get()
	if not msg:
	continue
	mtype = msg.get("type")
	if mtype == "shutdown":
	response_queue.put({"type": "shutdown_ack"})
	break
	if mtype != "query":
	continue

	req_id = msg["request_id"]
	query = msg["query"]
	stats = msg.get("stats")
	result = worker.run_query(query, stats=stats)
	response_queue.put({
	"type": "result",
	"request_id": req_id,
	"result": result,
	})
	finally:
	worker.close()

	class WebSearchModule(nn.Module):
	def __init__(self, model_dir: str = MODEL_DIR):
	super().__init__()
	self.model_dir = model_dir
	self.bundle = TextModelBundle(model_dir=model_dir)

	self._worker = None
	self._command_queue = None
	self._response_queue = None
	self._worker_ctx = None
	self._worker_started = False

	def __getstate__(self):
	state = self.__dict__.copy()
	state["_worker"] = None
	state["_command_queue"] = None
	state["_response_queue"] = None
	state["_worker_ctx"] = None
	state["_worker_started"] = False
	return state

	def __setstate__(self, state):
	self.__dict__.update(state)
	self._worker = None
	self._command_queue = None
	self._response_queue = None
	self._worker_ctx = None
	self._worker_started = False

	def _ensure_worker(self):
	if self._worker is not None and self._worker.is_alive():
	return

	self._worker_ctx = get_context("spawn")
	self._command_queue = self._worker_ctx.Queue()
	self._response_queue = self._worker_ctx.Queue()
	self._worker = self._worker_ctx.Process(
	target=_worker_main,
	args=(self._command_queue, self._response_queue),
	daemon=True,
	)
	self._worker.start()
	self._worker_started = True

	def close(self):
	if getattr(self, "_worker", None) is None:
	return

	try:
	if self._worker.is_alive() and self._command_queue is not None:
	self._command_queue.put({"type": "shutdown"})
	try:
	if self._response_queue is not None:
	self._response_queue.get(timeout=10)
	except Exception:
	pass
	self._worker.join(timeout=10)
	if self._worker.is_alive():
	self._worker.terminate()
	self._worker.join(timeout=5)
	finally:
	try:
	if self._command_queue is not None:
	self._command_queue.close()
	except Exception:
	pass
	try:
	if self._response_queue is not None:
	self._response_queue.close()
	except Exception:
	pass
	self._worker = None
	self._command_queue = None
	self._response_queue = None
	self._worker_ctx = None
	self._worker_started = False

	def __del__(self):
	try:
	self.close()
	except Exception:
	pass

	def embed(self, texts, convert_to_tensor=True):
	return self.bundle.embed(texts, convert_to_tensor=convert_to_tensor)

	def summarize(self, text: str, max_length: int = 300, min_length: int = 50):
	return self.bundle.generate_summary(text, max_length=max_length, min_length=min_length)

	def forward(self, query: str, stats: Optional[dict] = None, timeout: Optional[float] = None) -> str:
	self._ensure_worker()

	req_id = str(uuid.uuid4())
	if stats is not None:
	stats.setdefault("spiders_created", 0)
	stats.setdefault("spiders_completed", 0)
	stats.setdefault("spiders_killed", 0)
	stats.setdefault("total_seconds", 0.0)
	stats.setdefault("runs", [])
	stats["spiders_created"] += 1

	self._command_queue.put({
	"type": "query",
	"request_id": req_id,
	"query": query,
	"stats": stats,
	})

	started = time.perf_counter()
	effective_timeout = timeout if timeout is not None else None

	while True:
	if effective_timeout is not None and (time.perf_counter() - started) > effective_timeout:
	return f"Query is still running in the worker process for: {query}"

	try:
	msg = self._response_queue.get(timeout=0.25)
	except queue.Empty:
	continue
	except Exception:
	continue

	if msg.get("type") != "result" or msg.get("request_id") != req_id:
	continue

	result: CrawlResult = msg["result"]
	if stats is not None:
	stats["spiders_completed"] += 1
	stats["total_seconds"] += result.elapsed_seconds
	stats["runs"].append({
	"query": result.query,
	"seconds": round(result.elapsed_seconds, 3),
	"used_pages": len(result.used_candidates),
	"failed_pages": len(result.failed_candidates),
	"fallback_used": result.fallback_used,
	"error": result.error,
	})
	return result.answer or f"No content extracted for query: {query}"

	def __call__(self, query: str, **kwargs):
	return self.forward(query, **kwargs)

	class Web:
	"""
	High-level wrapper around WebSearchModule.

	Features:
	- Auto-locates WebSearch.pt in AppData
	- Creates one automatically if missing
	- Exposes all WebSearchModule methods
	- Supports save/load/reload
	- Supports direct querying through __call__
	"""

	DEFAULT_FOLDER = os.path.join(
	os.getenv("APPDATA", os.path.expanduser("~")),
	"PackedLLM"
	)

	DEFAULT_WEB_PATH = os.path.join(
	DEFAULT_FOLDER,
	"WebSearch.pt"
	)

	def __init__(
	self,
	web_location: Optional[str] = None,
	model_dir: str = "models",
	auto_create: bool = True,
	):
	self.model_dir = model_dir

	if web_location:
	self.web_path = os.path.abspath(web_location)
	else:
	self.web_path = self.DEFAULT_WEB_PATH

	os.makedirs(os.path.dirname(self.web_path), exist_ok=True)

	if os.path.exists(self.web_path):
	self.web = self._load(self.web_path)

	elif auto_create:
	self.web = WebSearchModule(model_dir=model_dir)
	self.save(self.web_path)

	else:
	raise FileNotFoundError(
	f"WebSearch checkpoint not found: {self.web_path}"
	)

	def _load(self, path: str) -> WebSearchModule:
	obj = torch.load(path, map_location="cpu", weights_only=False)

	if not isinstance(obj, WebSearchModule):
	raise TypeError(
	f"{path} does not contain a WebSearchModule."
	)

	return obj

	def save(self, path: Optional[str] = None):
	target = path or self.web_path

	os.makedirs(
	os.path.dirname(os.path.abspath(target)),
	exist_ok=True,
	)

	torch.save(self.web, target)
	self.web_path = target

	def reload(self):
	self.close()
	self.web = self._load(self.web_path)

	def search(self, query: str, **kwargs):
	return self.web.forward(query, **kwargs)

	def embed(self, texts, convert_to_tensor=True):
	return self.web.embed(
	texts,
	convert_to_tensor=convert_to_tensor,
	)

	def summarize(
	self,
	text: str,
	max_length: int = 300,
	min_length: int = 50,
	):
	return self.web.summarize(
	text,
	max_length=max_length,
	min_length=min_length,
	)

	def close(self):
	try:
	self.web.close()
	except Exception:
	pass

	def __call__(self, query: str, **kwargs):
	return self.web(query, **kwargs)

	def __getattr__(self, item):
	return getattr(self.web, item)

	@property
	def location(self):
	return self.web_path

	@property
	def exists(self):
	return os.path.exists(self.web_path)

	def info(self):
	return {
	"web_path": self.web_path,
	"exists": self.exists,
	"worker_running": (
	self.web._worker is not None
	and self.web._worker.is_alive()
	),
	"model_dir": self.model_dir,
	}

	def __repr__(self):
	return (
	f"Web("
	f"path='{self.web_path}', "
	f"exists={self.exists}"
	f")"
	)

	PRIMARY_WEIGHT_FILES = ("pytorch_model.bin")
	SKIP_BLOATED_FILES = {
	"model.onnx",
	"onnx_model.onnx",
	"openvino_model.bin",
	}

	@staticmethod
	def _bytes_to_uint8_tensor(data: bytes) -> torch.Tensor:
	arr = np.frombuffer(data, dtype=np.uint8)
	return torch.from_numpy(arr.copy())

	@staticmethod
	def _uint8_tensor_to_bytes(t: Union[torch.Tensor, bytes]) -> bytes:
	if isinstance(t, bytes):
	return t
	return bytes(t.detach().cpu().contiguous().numpy().tobytes())

	@staticmethod
	def _pick_primary_weight_file(model_dir: str) -> Optional[str]:
	for name in PRIMARY_WEIGHT_FILES:
	if os.path.exists(os.path.join(model_dir, name)):
	return name
	return None

	@dataclass
	class PackedRecord:
	id: str
	text: str
	meta: Dict[str, Any]
	embedding: Optional[np.ndarray] = None


	@dataclass
	class PackedTreeSnapshot:
	version: int
	docs_blob: bytes
	metas_blob: bytes
	ids_blob: bytes
	embs_blob: bytes
	extra_blob: bytes = b""


	class PackedTree:
	def __init__(self, name: str, embed_fn: Callable[..., np.ndarray], cluster_k: int = 4):
	self.name = name
	self.embed_fn = embed_fn
	self.cluster_k = int(cluster_k)

	self.docs: List[str] = []
	self.metas: List[Dict[str, Any]] = []
	self.ids: List[str] = []
	self.embs: np.ndarray = np.empty((0, 0), dtype=np.float32)
	self.norm_embs: np.ndarray = np.empty((0, 0), dtype=np.float32)
	self.id_to_idx: Dict[str, int] = {}
	self.hash_to_id: Dict[str, str] = {}
	self.query_cache: OrderedDict = OrderedDict()
	self.cluster_cache: Dict[str, Any] = {}
	self._lock = threading.RLock()
	self._clusters_dirty = True

	@staticmethod
	def norm_text(text: str) -> str:
	return _norm_ws(text).lower()

	@staticmethod
	def text_hash(text: str) -> str:
	h = hashlib.sha256()
	h.update(PackedTree.norm_text(text).encode("utf-8"))
	return h.hexdigest()

	def _cache_get(self, key):
	v = self.query_cache.get(key)
	if v is not None:
	self.query_cache.move_to_end(key)
	return v

	def _cache_put(self, key, value, max_size: int = 512):
	self.query_cache[key] = value
	self.query_cache.move_to_end(key)
	while len(self.query_cache) > max_size:
	self.query_cache.popitem(last=False)

	def add(self, text: str, meta: Optional[Dict[str, Any]] = None, item_id: Optional[str] = None) -> str:
	text = _norm_ws(text)
	if not text:
	return ""
	with self._lock:
	meta = dict(meta or {})
	item_id = item_id or meta.get("id") or str(uuid.uuid4())
	doc_hash = meta.get("hash") or self.text_hash(text)
	if doc_hash in self.hash_to_id:
	return self.hash_to_id[doc_hash]

	emb = np.asarray(self.embed_fn(text), dtype=np.float32)
	if emb.ndim != 1:
	emb = emb.reshape(-1)

	self.id_to_idx[item_id] = len(self.docs)
	self.hash_to_id[doc_hash] = item_id
	meta.setdefault("id", item_id)
	meta.setdefault("hash", doc_hash)
	meta.setdefault("timestamp", _now_iso())
	self.docs.append(text)
	self.metas.append(meta)
	self.ids.append(item_id)

	if self.embs.size == 0:
	self.embs = emb.reshape(1, -1).astype(np.float32)
	else:
	if self.embs.shape[1] != emb.shape[0]:
	raise ValueError(f"Embedding dimension mismatch in tree '{self.name}': {emb.shape[0]} != {self.embs.shape[1]}")
	self.embs = np.vstack([self.embs, emb.reshape(1, -1)])

	self.norm_embs = self._normalize_embeddings(self.embs)
	self._clusters_dirty = True
	return item_id

	def bulk_add(self, items: Sequence[Tuple[str, Dict[str, Any], Optional[str]]]) -> List[str]:
	ids = []
	for text, meta, item_id in items:
	ids.append(self.add(text, meta=meta, item_id=item_id))
	return ids

	def update_meta(self, item_id: str, patch: Mapping[str, Any]):
	with self._lock:
	idx = self.id_to_idx.get(item_id)
	if idx is None:
	return
	self.metas[idx].update(dict(patch))
	self._clusters_dirty = True

	def record_usage(self, item_id: str):
	with self._lock:
	idx = self.id_to_idx.get(item_id)
	if idx is None:
	return
	md = self.metas[idx]
	md["usage_count"] = int(md.get("usage_count", 0)) + 1
	md["last_used"] = _now_iso()

	@staticmethod
	def _compress(obj: Any) -> bytes:
	payload = json.dumps(obj, ensure_ascii=False, default=_json_default).encode("utf-8")
	return lzma.compress(payload, preset=9)

	@staticmethod
	def _decompress(blob: bytes, default: Any = None) -> Any:
	if not blob:
	return default
	try:
	raw = lzma.decompress(blob)
	return json.loads(raw.decode("utf-8"))
	except Exception:
	return default

	def snapshot(self) -> PackedTreeSnapshot:
	with self._lock:
	docs_blob = self._compress(self.docs)
	metas_blob = self._compress(self.metas)
	ids_blob = self._compress(self.ids)
	embs_blob = lzma.compress(self.embs.astype(np.float16).tobytes(), preset=9) if self.embs.size else b""
	extra = {
	"shape": list(self.embs.shape),
	"dtype": "float16",
	"cluster_k": self.cluster_k,
	"hash_to_id": self.hash_to_id,
	"id_to_idx": self.id_to_idx,
	}
	extra_blob = self._compress(extra)
	return PackedTreeSnapshot(
	version=1,
	docs_blob=docs_blob,
	metas_blob=metas_blob,
	ids_blob=ids_blob,
	embs_blob=embs_blob,
	extra_blob=extra_blob,
	)

	def restore(self, snap: PackedTreeSnapshot):
	with self._lock:
	self.docs = self._decompress(snap.docs_blob, default=[])
	self.metas = self._decompress(snap.metas_blob, default=[])
	self.ids = self._decompress(snap.ids_blob, default=[])
	extra = self._decompress(snap.extra_blob, default={}) or {}
	shape = tuple(extra.get("shape") or [0, 0])
	self.cluster_k = int(extra.get("cluster_k", self.cluster_k))
	self.hash_to_id = dict(extra.get("hash_to_id", {}))
	self.id_to_idx = {k: int(v) for k, v in dict(extra.get("id_to_idx", {})).items()}

	if snap.embs_blob and shape and shape[0] > 0 and shape[1] > 0:
	raw = lzma.decompress(snap.embs_blob)
	arr = np.frombuffer(raw, dtype=np.float16).reshape(shape).astype(np.float32)
	self.embs = arr
	self.norm_embs = self._normalize_embeddings(arr)
	else:
	self.embs = np.empty((0, 0), dtype=np.float32)
	self.norm_embs = np.empty((0, 0), dtype=np.float32)

	self.query_cache = OrderedDict()
	self.cluster_cache = {}
	self._clusters_dirty = True

	@staticmethod
	def _normalize_embeddings(embs: np.ndarray) -> np.ndarray:
	if embs.size == 0:
	return embs
	norms = np.linalg.norm(embs, axis=1, keepdims=True)
	norms[norms == 0] = 1.0
	return embs / norms

	@staticmethod
	def _cosine_scores(query_emb: np.ndarray, matrix: np.ndarray) -> np.ndarray:
	if matrix.size == 0:
	return np.array([], dtype=np.float32)
	q = query_emb.astype(np.float32).reshape(1, -1)
	qn = q / np.maximum(np.linalg.norm(q, axis=1, keepdims=True), 1e-8)
	mn = PackedTree._normalize_embeddings(matrix.astype(np.float32))
	return (qn @ mn.T)[0]

	def _build_clusters(self):
	if KMeans is None or self.embs.shape[0] < 2:
	self.cluster_cache = {"centers": None, "clusters": {0: {"idxs": list(range(len(self.docs)))}}}
	self._clusters_dirty = False
	return

	k = min(self.cluster_k, self.embs.shape[0])
	if k <= 1:
	self.cluster_cache = {"centers": self.norm_embs[:1], "clusters": {0: {"idxs": list(range(len(self.docs)))}}}
	self._clusters_dirty = False
	return

	km = KMeans(n_clusters=k, random_state=0, n_init="auto")
	labels = km.fit_predict(self.norm_embs)
	centers = km.cluster_centers_.astype(np.float32)
	clusters: Dict[int, Dict[str, Any]] = {i: {"idxs": []} for i in range(k)}
	for idx, lab in enumerate(labels):
	clusters[int(lab)]["idxs"].append(idx)
	self.cluster_cache = {"centers": centers, "clusters": clusters}
	self._clusters_dirty = False

	def search(self, query: str, top_k: int = 5, min_score: float = 0.0, hybrid: bool = True, use_clusters: bool = False) -> List[Dict[str, Any]]:
	query = _norm_ws(query)
	if not query:
	return []
	qkey = (query, top_k, min_score, hybrid, use_clusters)
	cached = self._cache_get(qkey)
	if cached is not None:
	return cached

	if self.embs.size == 0:
	return []

	q_emb = np.asarray(self.embed_fn(query), dtype=np.float32).reshape(-1)
	scores = self._cosine_scores(q_emb, self.embs)
	if scores.size == 0:
	return []

	order = np.argsort(scores)[::-1]
	results: List[Dict[str, Any]] = []
	seen = set()
	q_tokens = set(self.norm_text(query).split())

	for idx in order:
	score = float(scores[idx])
	if score < min_score:
	continue
	doc = self.docs[int(idx)]
	doc_norm = self.norm_text(doc)
	if doc_norm in seen:
	continue
	seen.add(doc_norm)
	md = self.metas[int(idx)]
	kw = 0.0
	if hybrid and q_tokens:
	d_tokens = set(doc_norm.split())
	kw = len(q_tokens.intersection(d_tokens)) / max(1.0, (len(q_tokens) + len(d_tokens)) / 2.0)
	final = 0.75 * score + 0.2 * kw + 0.05 * float(md.get("importance", 0.5))
	results.append({
	"id": self.ids[int(idx)],
	"passage": doc,
	"raw_similarity": score,
	"score": max(0.0, min(1.0, final)),
	"metadata": md,
	})
	if len(results) >= top_k:
	break

	if use_clusters and self._clusters_dirty:
	self._build_clusters()

	self._cache_put(qkey, results)
	return results

	def retrieve_by_semantics(self, query: str, num_clusters: int = 2, top_k_per_cluster: int = 3, min_score: float = 0.0) -> List[Dict[str, Any]]:
	query = _norm_ws(query)
	if not query:
	return []
	if self.embs.size == 0:
	return []
	if self._clusters_dirty:
	self._build_clusters()

	centers = self.cluster_cache.get("centers")
	clusters = self.cluster_cache.get("clusters") or {}
	if centers is None:
	return self.search(query, top_k=num_clusters * top_k_per_cluster, min_score=min_score, hybrid=True)

	q_emb = np.asarray(self.embed_fn(query), dtype=np.float32).reshape(1, -1)
	center_sims = self._cosine_scores(q_emb.reshape(-1), centers)
	top_cluster_ids = np.argsort(center_sims)[::-1][:min(num_clusters, len(center_sims))]
	results: List[Dict[str, Any]] = []
	seen = set()

	for cid in top_cluster_ids:
	idxs = clusters.get(int(cid), {}).get("idxs", [])
	if not idxs:
	continue
	local_embs = self.norm_embs[idxs]
	sims = self._cosine_scores(q_emb.reshape(-1), local_embs)
	top_local = np.argsort(sims)[::-1][:top_k_per_cluster]
	for local_idx in top_local:
	global_idx = idxs[int(local_idx)]
	raw = float(sims[int(local_idx)])
	if raw < min_score:
	continue
	doc = self.docs[global_idx]
	doc_norm = self.norm_text(doc)
	if doc_norm in seen:
	continue
	seen.add(doc_norm)
	md = self.metas[global_idx]
	q_tokens = set(self.norm_text(query).split())
	d_tokens = set(doc_norm.split())
	kw = len(q_tokens.intersection(d_tokens)) / max(1.0, (len(q_tokens) + len(d_tokens)) / 2.0)
	final = 0.75 * raw + 0.2 * kw + 0.05 * float(md.get("importance", 0.5))
	results.append({
	"id": self.ids[global_idx],
	"passage": doc,
	"raw_similarity": raw,
	"score": max(0.0, min(1.0, final)),
	"metadata": md,
	})
	if len(results) >= top_k_per_cluster * num_clusters:
	break
	if len(results) < top_k_per_cluster:
	extra = self.search(query, top_k=top_k_per_cluster * num_clusters, min_score=min_score, hybrid=True)
	for item in extra:
	if self.norm_text(item["passage"]) not in seen:
	seen.add(self.norm_text(item["passage"]))
	results.append(item)
	return results


	def _json_default(obj: Any):
	if isinstance(obj, (np.integer, np.floating)):
	return obj.item()
	if isinstance(obj, np.ndarray):
	return obj.tolist()
	if isinstance(obj, (set, tuple)):
	return list(obj)
	if dataclasses.is_dataclass(obj):
	return asdict(obj)
	if isinstance(obj, bytes):
	return base64.b64encode(obj).decode("ascii")
	if isinstance(obj, Path):
	return str(obj)
	raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable")


	def _extract_response_text_from_result(result: Any) -> str:
	def coerce_to_str(v):
	if isinstance(v, str):
	return v
	if isinstance(v, (list, tuple)):
	pieces = []
	for x in v:
	if isinstance(x, str) and x.strip():
	pieces.append(x)
	elif isinstance(x, dict):
	for k in ("content", "response", "assistant", "final"):
	if k in x and isinstance(x[k], str) and x[k].strip():
	pieces.append(x[k])
	break
	else:
	for val in x.values():
	if isinstance(val, str) and val.strip():
	pieces.append(val)
	else:
	try:
	pieces.append(str(x))
	except Exception:
	pass
	return "\n".join(pieces)
	if isinstance(v, dict):
	for key in ("response", "assistant", "final", "content"):
	val = v.get(key)
	if isinstance(val, str) and val.strip():
	return val
	if isinstance(val, (list, tuple, dict)):
	s = coerce_to_str(val)
	if s:
	return s
	vals = [str(x) for x in v.values() if isinstance(x, str) and x.strip()]
	if vals:
	return "\n".join(vals)
	try:
	return json.dumps(v)
	except Exception:
	return str(v)
	try:
	return str(v)
	except Exception:
	return ""

	if isinstance(result, dict):
	for key in ("blocks", "response", "assistant", "final", "content"):
	if key in result and result[key]:
	return coerce_to_str(result[key])
	vals = [v for v in result.values() if isinstance(v, (str, list, dict)) and v]
	if vals:
	return coerce_to_str(vals[0])
	return json.dumps(result)
	return coerce_to_str(result)


	class DesktopControl:
	def __init__(self):
	self._available = None

	def _lazy_import(self):
	if self._available is not None:
	return self._available
	mods = {}
	for name in ("pyautogui", "keyboard", "mouse", "psutil", "win32gui", "pygetwindow", "ctypes"):
	try:
	mods[name] = importlib.import_module(name)
	except Exception:
	mods[name] = None
	self._available = mods
	return mods

	def get_location_string(self) -> str:
	try:
	import geocoder # type: ignore
	g = geocoder.ip("me")
	city = g.city or "UnknownCity"
	state = g.state or "UnknownState"
	country = g.country or "UnknownCountry"
	return f"{city}/{state}/{country}"
	except Exception:
	return "UnknownCity/UnknownState/UnknownCountry"

	def get_time_string(self) -> str:
	now = datetime.now()
	date_str = now.strftime("%d/%m/%Y")
	time_str = now.strftime("%I:%M:%S/%p").lower()
	return f"{date_str}\n{time_str}"

	def is_desktop_active(self) -> bool:
	mods = self._lazy_import()
	win32gui = mods.get("win32gui")
	if win32gui is None:
	return False
	desktop_hwnd = win32gui.GetDesktopWindow()
	active_hwnd = win32gui.GetForegroundWindow()
	return active_hwnd == desktop_hwnd

	def is_program_active(self, program_name: str) -> bool:
	mods = self._lazy_import()
	gw = mods.get("pygetwindow")
	psutil_mod = mods.get("psutil")
	if gw is None or psutil_mod is None:
	return False
	active_window = gw.getActiveWindow()
	if active_window:
	active_title = active_window.title or ""
	for process in psutil_mod.process_iter(["pid", "name"]):
	name = (process.info.get("name") or "").lower()
	if name == program_name.lower():
	return program_name.lower() in active_title.lower()
	return False

	def fast_move(self, x, y):
	mods = self._lazy_import()
	if mods.get("ctypes") is None:
	return
	mods["ctypes"].windll.user32.SetCursorPos(x, y)

	def scroll_mouse(self, delta):
	mods = self._lazy_import()
	if mods.get("ctypes") is None:
	return
	mods["ctypes"].windll.user32.mouse_event(0x0800, 0, 0, int(delta * 120), 0)

	def press_special_key(self, key):
	mods = self._lazy_import()
	if mods.get("ctypes") is None:
	return
	special_keys = {
	"volume up": 0xAF,
	"volume down": 0xAE,
	"volume mute": 0xAD,
	"play/pause media": 0xB3,
	"next track": 0xB0,
	"prev track": 0xB1,
	}
	vk_code = special_keys.get(key)
	if vk_code is None:
	return
	mods["ctypes"].windll.user32.keybd_event(vk_code, 0, 0, 0)
	mods["ctypes"].windll.user32.keybd_event(vk_code, 0, 2, 0)
	time.sleep(0.1)

	def minimize_all_windows(self):
	mods = self._lazy_import()
	if mods.get("ctypes") is None:
	return
	mods["ctypes"].windll.user32.keybd_event(0x5B, 0, 0, 0)
	mods["ctypes"].windll.user32.keybd_event(0x4D, 0, 0, 0)
	mods["ctypes"].windll.user32.keybd_event(0x4D, 0, 2, 0)
	mods["ctypes"].windll.user32.keybd_event(0x5B, 0, 2, 0)
	time.sleep(1)

	def run_pyautogui_command(self, command_name: str):
	mods = self._lazy_import()
	pyautogui = mods.get("pyautogui")
	if pyautogui is None:
	raise RuntimeError("pyautogui is not installed")
	getattr(pyautogui, command_name)()


	@dataclass
	class ActionDecision:
	type: str
	command_name: Optional[str] = None
	command_description: Optional[str] = None
	command_text: Optional[str] = None
	memory_access: List[str] = field(default_factory=list)
	source: Optional[str] = None
	profile: Optional[str] = None
	query: Optional[str] = None
	sufficient_to_answer: bool = False
	parameters: Dict[str, Any] = field(default_factory=dict)


	class CommandRegistry:
	def __init__(self, owner: "GATOR"):
	self.owner = owner
	self.commands: Dict[str, Dict[str, Any]] = {}
	self.custom_commands: Dict[str, Dict[str, Any]] = {}
	self.shortcuts_dir = "./Mods/COMMANDS"
	self._lock = threading.RLock()

	def register_command(self, name: str, description: str, action: Optional[Callable] = None, command_type: str = "basic"):
	with self._lock:
	self.commands[name] = {
	"name": name,
	"description": description,
	"action": action,
	"type": command_type,
	}

	def update_command(self, name: str, description: Optional[str] = None, action: Optional[Callable] = None, command_type: Optional[str] = None):
	with self._lock:
	if name not in self.commands:
	self.commands[name] = {"name": name, "description": "", "action": None, "type": "basic"}
	if description is not None:
	self.commands[name]["description"] = description
	if action is not None:
	self.commands[name]["action"] = action
	if command_type is not None:
	self.commands[name]["type"] = command_type

	def register_custom_command(self, command_name: str, phrase: str, description: str, actions: Optional[List[Dict[str, Any]]] = None):
	with self._lock:
	self.custom_commands[command_name] = {
	"phrase": phrase,
	"description": description,
	"actions": actions or [],
	}
	self.owner.command_tree.add_command_branch(
	command_name=phrase,
	command_action=command_name,
	command_type="custom",
	description=description,
	)

	def check_custom_commands(self, text: str, matched_commands: Optional[List[str]] = None):
	matched_commands = matched_commands or []
	text_lower = (text or "").lower()
	for command_name, data in self.custom_commands.items():
	phrase = (data.get("phrase") or "").lower()
	if phrase and phrase in text_lower:
	matched_commands.append(command_name)
	return matched_commands if matched_commands else False

	def check_basic_commands(self, text: str, matched_commands: Optional[List[Tuple[Callable, List[Any]]]] = None):
	matched_commands = matched_commands or []
	text_lower = (text or "").lower()
	for command_dict in self.commands.values():
	commands = command_dict.get("commands") or [command_dict.get("name")]
	action = command_dict.get("action")
	if action is None:
	continue
	for cmd in commands:
	if cmd and cmd.lower() in text_lower:
	matched_commands.append((action, []))
	return bool(matched_commands)

	def check_shortcuts(self, text: str, shortcut_dir: Optional[str] = None):
	shortcut_dir = shortcut_dir or self.shortcuts_dir
	os.makedirs(shortcut_dir, exist_ok=True)
	actions = []
	text_lower = (text or "").lower()
	for entry in os.listdir(shortcut_dir):
	entry_path = os.path.join(shortcut_dir, entry)
	if os.path.isdir(entry_path):
	actions.extend(self._check_folder(entry_path, text_lower))
	else:
	name_no_ext = os.path.splitext(entry)[0].lower()
	if os.path.isfile(entry_path) and name_no_ext in text_lower:
	actions.append((os.startfile, [entry_path]))
	return actions

	def _check_folder(self, base_dir, text_lower):
	matched_paths = []
	for entry in os.listdir(base_dir):
	entry_path = os.path.join(base_dir, entry)
	name_no_ext = os.path.splitext(entry)[0].lower()
	if os.path.isfile(entry_path) and name_no_ext in text_lower:
	matched_paths.append(entry_path)
	elif os.path.isdir(entry_path):
	matched_paths.extend(self._check_folder(entry_path, text_lower))
	return [(os.startfile, [p]) for p in matched_paths]

	def execute_command(self, commands, command_executed_tags, argument_dictionary=None):
	for action, _ in commands:
	map_args = []
	if argument_dictionary:
	map_args = []
	for arg in []:
	if isinstance(arg, str) and arg.startswith("{") and arg.endswith("}"):
	key = arg.strip("{}")
	map_args.append(argument_dictionary.get(key, ""))
	else:
	map_args.append(arg)
	action(*map_args)
	command_executed_tags.append(f"Executed action: {getattr(action, '__name__', str(action))}")

	def execute_shortcut(self, actions, command_executed_tags):
	for action_fn, args in actions:
	file_path = os.path.abspath(args[0])
	if not os.path.exists(file_path):
	command_executed_tags.append(f"Path not found: {file_path}")
	continue
	if os.path.isdir(file_path):
	command_executed_tags.append(f"Skipped directory: {file_path}")
	continue
	try:
	os.startfile(file_path)
	command_executed_tags.append(f"Opened: {file_path}")
	except Exception:
	try:
	subprocess.Popen(["cmd", "/c", "start", "", file_path], shell=True)
	command_executed_tags.append(f"Opened via cmd start: {file_path}")
	except Exception:
	command_executed_tags.append(f"Failed to open: {file_path}")

	def execute_custom_command(self, command_name):
	payload = self.custom_commands.get(command_name)
	if not payload:
	return
	actions = payload.get("actions") or []
	if not actions:
	return
	for action in actions:
	kind = action.get("event")
	if kind == "key_down":
	self.owner.desktop._lazy_import().get("pyautogui")
	import pyautogui # type: ignore
	pyautogui.keyDown(action["key"])
	elif kind == "key_up":
	import pyautogui # type: ignore
	pyautogui.keyUp(action["key"])
	elif kind == "mouse_down":
	import pyautogui # type: ignore
	pyautogui.mouseDown(button=action["button"])
	elif kind == "mouse_up":
	import pyautogui # type: ignore
	pyautogui.mouseUp(button=action["button"])
	elif kind == "mouse_move":
	self.owner.desktop.fast_move(action["x"], action["y"])
	elif kind == "mouse_scroll":
	self.owner.desktop.scroll_mouse(action["delta"])

	def process_commands(self, command, command_type="shortcut", argument_dictionary=None):
	command_executed_tags = []
	executed_actions = []

	if command_type == "basic":
	basic_matches = []
	if self.check_basic_commands(command, basic_matches):
	self.execute_command(basic_matches, command_executed_tags, argument_dictionary)
	executed_actions.extend(basic_matches)

	elif command_type == "shortcut":
	shortcut_actions = self.check_shortcuts(command, self.shortcuts_dir)
	if shortcut_actions:
	self.execute_shortcut(shortcut_actions, command_executed_tags)
	executed_actions.extend(shortcut_actions)

	elif command_type == "custom":
	found = self.check_custom_commands(command)
	if found:
	for cmd_name in found:
	self.execute_custom_command(cmd_name)
	executed_actions.append(cmd_name)

	return executed_actions


	# ---------------------------------------------------------------------------
	# GATOR module
	# ---------------------------------------------------------------------------
	class GATOR(nn.Module):

	STATE_VERSION = 1

	def __init__(
	self,
	lm_checkpoint_path: str = "LM.pt",
	embedder_name: str = "second-state/jina-embeddings-v3-GGUF",
	embedder_local_dir: str = os.path.join("models", "jinaai"),
	embedder_filename: str = GGUF_EMBED_FILENAME,
	device: str = "cpu",
	warm_on_start: bool = True,
	compression: str = "lzma",
	store_dtype: str = "float16",
	cluster_k: int = 4,
	auto_load_lm: bool = True,
	strict_lm: bool = True,
	embedder_pack: Optional[Dict[str, Any]] = None,
	):
	super().__init__()

	self.config = {
	"lm_checkpoint_path": lm_checkpoint_path,
	"embedder_name": embedder_name,
	"embedder_local_dir": embedder_local_dir,
	"embedder_filename": embedder_filename,
	"device": device,
	"warm_on_start": warm_on_start,
	"compression": compression,
	"store_dtype": store_dtype,
	"cluster_k": cluster_k,
	"auto_load_lm": auto_load_lm,
	"strict_lm": strict_lm,
	}

	self.device_name = device
	self.compression = compression
	self.store_dtype = store_dtype
	self.cluster_k = int(cluster_k)
	self.strict_lm = strict_lm
	self._lock = threading.RLock()
	self._snapshot_cache: Dict[str, bytes] = {}
	self._runtime_cache: OrderedDict = OrderedDict()
	self._runtime_cache_max = 16
	self._last_route: Dict[str, Any] = {}
	self._last_response: str = ""
	self._last_plan: Dict[str, Any] = {}

	self.desktop = DesktopControl()
	self.command_registry = CommandRegistry(self)
	self.lm = self._load_lm(lm_checkpoint_path, auto_load=auto_load_lm)

	self.embedder_name = embedder_name
	self.embedder_local_dir = embedder_local_dir
	self.embedder_filename = embedder_filename

	if embedder_pack is not None and embedder_pack.get("gguf_bytes") is not None:
	self.embedder_pack = embedder_pack
	self.embedder = self._restore_embedder_from_pack(self.embedder_pack)
	self.embedder_path = self.embedder_pack.get("gguf_source_path", "")
	else:
	self.embedder_path = self._resolve_local_embedder_gguf(embedder_local_dir, embedder_filename)
	self.embedder_pack = self._load_embedder_pack(self.embedder_path)
	self.embedder = self._restore_embedder_from_pack(self.embedder_pack)

	self.embedder_tokenizer = None
	probe = self._embed_raw(["__gator_probe__"], task="retrieval.passage")
	self.embed_dim = int(probe.shape[-1]) if probe.ndim == 2 and probe.shape[-1] > 0 else 1024

	self._store: Dict[str, PackedTree] = {
	"knowledge": PackedTree("knowledge", self.embed, cluster_k=self.cluster_k),
	"conversation": PackedTree("conversation", self.embed, cluster_k=self.cluster_k),
	"profile_user": PackedTree("profile_user", self.embed, cluster_k=self.cluster_k),
	"profile_bot": PackedTree("profile_bot", self.embed, cluster_k=self.cluster_k),
	"commands": PackedTree("commands", self.embed, cluster_k=self.cluster_k),
	"assets": PackedTree("assets", self.embed, cluster_k=self.cluster_k),
	"telemetry": PackedTree("telemetry", self.embed, cluster_k=self.cluster_k),
	}

	self._command_phrases: Dict[str, Dict[str, Any]] = {}
	self._warm_on_start = warm_on_start
	if warm_on_start:
	self.warmup()

	@staticmethod
	def _bytes_to_uint8_tensor(data: bytes) -> torch.Tensor:
	arr = np.frombuffer(data, dtype=np.uint8)
	return torch.from_numpy(arr.copy())

	@staticmethod
	def _uint8_tensor_to_bytes(t: torch.Tensor) -> bytes:
	return bytes(t.detach().cpu().contiguous().numpy().tobytes())

	def __getstate__(self):
	state = self.__dict__.copy()
	state["embedder"] = None
	state["embedder_tokenizer"] = None
	state["embedder_pack"] = self._snapshot_embedder_pack()
	state["lm"] = self._snapshot_lm_handle()
	state["_lock"] = None
	return state

	def __setstate__(self, state):
	self.__dict__.update(state)
	self._lock = threading.RLock()
	self.desktop = self.desktop if isinstance(self.desktop, DesktopControl) else DesktopControl()
	self.command_registry = self.command_registry if isinstance(self.command_registry, CommandRegistry) else CommandRegistry(self)
	if self.embedder is None:
	self.embedder = self._restore_embedder_from_pack(self.embedder_pack)
	self.embedder_tokenizer = None
	self.lm = self._restore_lm_handle(self.lm)
	if not hasattr(self, "_store"):
	self._store = {
	"knowledge": PackedTree("knowledge", self.embed, cluster_k=self.cluster_k),
	"conversation": PackedTree("conversation", self.embed, cluster_k=self.cluster_k),
	"profile_user": PackedTree("profile_user", self.embed, cluster_k=self.cluster_k),
	"profile_bot": PackedTree("profile_bot", self.embed, cluster_k=self.cluster_k),
	"commands": PackedTree("commands", self.embed, cluster_k=self.cluster_k),
	"assets": PackedTree("assets", self.embed, cluster_k=self.cluster_k),
	"telemetry": PackedTree("telemetry", self.embed, cluster_k=self.cluster_k),
	}

	def _load_lm(self, lm_checkpoint_path: str, auto_load: bool = True):
	if not auto_load:
	return None
	if load_packedlm is None:
	raise RuntimeError("PackedLM.load_packedlm is unavailable. Import PackedLM before GATOR.")
	if not os.path.exists(lm_checkpoint_path):
	raise FileNotFoundError(f"LM checkpoint not found: {lm_checkpoint_path}")
	return load_packedlm(lm_checkpoint_path)

	def _snapshot_lm_handle(self):
	return self.lm

	def _restore_lm_handle(self, packed):
	return packed

	def _resolve_local_embedder_gguf(self, local_dir: str, embedder_filename: str) -> str:
	candidates = [
	Path(local_dir).resolve() if local_dir else None,
	Path("models").resolve(),
	(Path("models") / "jinaai").resolve(),
	(Path("models") / "jinaai" / "jina-embeddings-v3").resolve(),
	]
	candidates = [p for p in candidates if p is not None]

	for root in candidates:
	if not root.exists():
	continue

	direct = root / embedder_filename
	if direct.is_file():
	return str(direct)

	for p in root.rglob(embedder_filename):
	if p.is_file():
	return str(p)

	raise FileNotFoundError(
	f"Could not find {embedder_filename} under: {[str(p) for p in candidates]}"
	)

	def _load_embedder_pack(self, gguf_path: str) -> Dict[str, Any]:
	pack_path = str(Path(gguf_path).with_suffix(Path(gguf_path).suffix + ".pt"))

	if os.path.exists(pack_path):
	try:
	pack = torch.load(pack_path, map_location="cpu", weights_only=False)
	if isinstance(pack, dict) and pack.get("gguf_bytes") is not None:
	return pack
	except Exception:
	try:
	os.remove(pack_path)
	except Exception:
	pass

	raw = Path(gguf_path).read_bytes()
	pack = {
	"gguf_filename": Path(gguf_path).name,
	"gguf_bytes": self._bytes_to_uint8_tensor(raw),
	"gguf_source_path": str(Path(gguf_path).resolve()),
	}

	try:
	torch.save(pack, pack_path, pickle_protocol=5)
	except Exception:
	pass

	return pack

	def _restore_embedder_from_pack(self, pack: Dict[str, Any]):
	if not pack or pack.get("gguf_bytes") is None:
	raise RuntimeError("Embedder GGUF pack is missing")

	tmp_dir = tempfile.mkdtemp(prefix="gator_embedder_")
	try:
	gguf_path = Path(tmp_dir) / pack["gguf_filename"]
	gguf_path.write_bytes(self._uint8_tensor_to_bytes(pack["gguf_bytes"]))

	llm = Llama(
	model_path=str(gguf_path),
	embedding=True,
	verbose=False,
	n_ctx=8192,
	use_mmap=False,
	use_mlock=False,
	)
	return llm
	finally:
	shutil.rmtree(tmp_dir, ignore_errors=True)

	def _load_embedder(self, model_name: str, local_dir: str):
	gguf_path = self._resolve_local_embedder_gguf(local_dir, self.embedder_filename)
	pack = self._load_embedder_pack(gguf_path)
	embedder = self._restore_embedder_from_pack(pack)
	return embedder, None, pack

	def _snapshot_embedder_pack(self) -> Dict[str, Any]:
	if isinstance(self.embedder_pack, dict) and self.embedder_pack.get("gguf_bytes") is not None:
	return {
	"gguf_filename": self.embedder_pack["gguf_filename"],
	"gguf_bytes": self.embedder_pack["gguf_bytes"],
	"gguf_source_path": self.embedder_pack.get("gguf_source_path", ""),
	}
	raise RuntimeError("Embedder pack is missing")

	def _embed_raw(self, texts: Union[str, Sequence[str]], task: str = "retrieval.passage") -> np.ndarray:
	if isinstance(texts, str):
	texts = [texts]
	if not texts:
	return np.empty((0, self.embed_dim), dtype=np.float32)

	if hasattr(self.embedder, "create_embedding"):
	resp = self.embedder.create_embedding(list(texts))
	if isinstance(resp, dict) and "data" in resp:
	embs = [row["embedding"] for row in resp["data"]]
	else:
	embs = resp
	elif hasattr(self.embedder, "embed"):
	resp = self.embedder.embed(list(texts))
	if isinstance(resp, dict) and "data" in resp:
	embs = [row["embedding"] for row in resp["data"]]
	else:
	embs = resp
	else:
	raise RuntimeError("Loaded GGUF embedder does not expose create_embedding() or embed()")

	embs = np.asarray(embs, dtype=np.float32)
	if embs.ndim == 1:
	embs = embs.reshape(1, -1)
	return embs

	def embed(self, texts: Union[str, Sequence[str]], task: str = "retrieval.passage") -> np.ndarray:
	return self._embed_raw(texts, task=task)

	def embed_query(self, texts: Union[str, Sequence[str]]) -> np.ndarray:
	return self.embed(texts, task="retrieval.query")

	def embed_passage(self, texts: Union[str, Sequence[str]]) -> np.ndarray:
	return self.embed(texts, task="retrieval.passage")

	def embed_classification(self, texts: Union[str, Sequence[str]]) -> np.ndarray:
	return self.embed(texts, task="classification")

	def embed_matching(self, texts: Union[str, Sequence[str]]) -> np.ndarray:
	return self.embed(texts, task="text-matching")

	def _snapshot_store(self) -> Dict[str, Any]:
	return {name: tree.snapshot() for name, tree in self._store.items()}

	def _restore_store(self, packed: Mapping[str, Any]):
	for name, tree in self._store.items():
	snap = packed.get(name)
	if isinstance(snap, PackedTreeSnapshot):
	tree.restore(snap)
	elif isinstance(snap, dict):
	tree.restore(PackedTreeSnapshot(**snap))

	@staticmethod
	def normalize_for_hash(obj: Any) -> Any:
	if isinstance(obj, dict):
	return {k: GATOR.normalize_for_hash(v) for k, v in obj.items()}
	if isinstance(obj, list):
	return [GATOR.normalize_for_hash(v) for v in obj]
	if hasattr(obj, "item"):
	try:
	return obj.item()
	except Exception:
	return obj
	return obj

	def get_location_string(self):
	return self.desktop.get_location_string()

	def get_time_string(self):
	return self.desktop.get_time_string()

	def _lm_head(self, prompt: str, mode: str = "decision") -> Dict[str, Any]:
	if self.lm is None:
	raise RuntimeError("LM.pt is not loaded")
	system_prompt = (
	"You are GATOR HeadExpert. Decide whether the user request needs retrieval, a tool, or a direct answer. "
	"Return strict JSON only."
	)
	tool_prompt = {
	"input_text": prompt,
	"available_commands": list(self.command_registry.commands.values()),
	"system_goal": system_prompt,
	"mode": mode,
	}
	if hasattr(self.lm, "head_expert"):
	raw = self.lm.head_expert(_safe_json_dumps(tool_prompt))
	else:
	raw = self.lm.head(prompt)
	return self._parse_json_object(raw, default={"actions": []})

	def _lm_tool(self, query: str, tools: List[Dict[str, Any]]) -> Dict[str, Any]:
	if self.lm is None:
	raise RuntimeError("LM.pt is not loaded")
	if hasattr(self.lm, "tool_expert"):
	raw = self.lm.tool_expert(query, tools=tools)
	else:
	raw = self.lm.tool(query, tools=tools)
	return self._parse_json_object(raw, default={"tool_calls": []})

	@staticmethod
	def _parse_json_object(text: str, default: Any = None) -> Any:
	if not text:
	return default
	text = text.strip()
	try:
	return json.loads(text)
	except Exception:
	m = re.search(r"(\{.*})", text, re.DOTALL)
	if m:
	try:
	return json.loads(m.group(1))
	except Exception:
	return default
	return default

	def _decision_to_actions(self, decision: Mapping[str, Any]) -> List[ActionDecision]:
	actions = decision.get("actions") if isinstance(decision, Mapping) else None
	if not isinstance(actions, list):
	actions = [decision] if isinstance(decision, Mapping) else []
	parsed: List[ActionDecision] = []
	for item in actions:
	if not isinstance(item, Mapping):
	continue
	parsed.append(ActionDecision(
	type=str(item.get("type", "None")),
	command_name=item.get("command_name"),
	command_description=item.get("command_description"),
	command_text=item.get("command_text"),
	memory_access=list(item.get("memory_access") or []),
	source=item.get("source"),
	profile=item.get("profile"),
	query=item.get("query"),
	sufficient_to_answer=bool(item.get("sufficient_to_answer", False)),
	parameters=dict(item.get("parameters") or {}),
	))
	return parsed

	def store_knowledge(self, documents: Sequence[str], tags: Optional[Sequence[str]] = None, source: str = "user", importance: float = 0.5):
	tags = list(tags) if tags is not None else ["knowledge"] * len(documents)
	if len(tags) != len(documents):
	raise ValueError("Length of tags must match length of documents")
	for doc, tag in zip(documents, tags):
	meta = {"tag": tag, "source": source, "importance": float(importance), "usage_count": 0, "last_used": None}
	self._store["knowledge"].add(doc, meta)

	def search_knowledge(self, query: str, top_k: int = 5, hybrid: bool = True, min_score: float = 0.0):
	return self._store["knowledge"].search(query, top_k=top_k, hybrid=hybrid, min_score=min_score, use_clusters=True)

	def process_knowledge(self, query, history="", location="", time_date=""):
	if not query:
	return []
	try:
	results = self._store["knowledge"].retrieve_by_semantics(query=query, num_clusters=3, top_k_per_cluster=3, min_score=0.0)
	except Exception:
	results = self._store["knowledge"].search(query=query, top_k=9, hybrid=True)
	return results

	def store_conversation_leaf(self, text: str, conv_id: str, leaf_type: str = "input"):
	meta = {"id": conv_id, "type": "branch", "leaf_type": leaf_type}
	self._store["conversation"].add(text, meta)

	def process_conversation(self, query, id, type="input"):
	if type == "input":
	relevant_context = self.conversation_tree.add_input_leaf(query, id)
	return relevant_context
	else:
	self.conversation_tree.add_output_leaf(query, id)

	def store_profile_leaf(self, profile_id: str, text: str, importance: float = 0.5, profile_type: str = "user"):
	target = "profile_user" if profile_type == "user" else "profile_bot"
	meta = {"profile_id": profile_id, "importance": float(importance), "source": profile_type}
	self._store[target].add(text, meta)

	def search_profile_leaves(self, profile_id: str, query: str, profile_type: str = "user", top_k: int = 3, min_score: float = 0.0):
	target = "profile_user" if profile_type == "user" else "profile_bot"
	tree = self._store[target]
	results = tree.search(query, top_k=max(top_k * 3, top_k), min_score=min_score, hybrid=True, use_clusters=True)
	filtered = [r for r in results if r.get("metadata", {}).get("profile_id") == profile_id]
	return filtered[:top_k]

	def store_command(
	self,
	command_name: str,
	phrase: str,
	description: str,
	command_type: str = "custom",
	actions: Optional[List[Dict[str, Any]]] = None
	):
	meta = {
	"command_name": command_name,
	"phrase": phrase,
	"description": description,
	"type": command_type,
	"actions": actions or [],
	}

	self.command_registry.custom_commands[command_name] = meta
	self.command_registry.commands[command_name] = meta

	self._store["commands"].add(
	f"Command: {phrase}",
	{
	"command_name": command_name,
	"phrase": phrase,
	"description": description,
	"type": command_type,
	},
	item_id=f"command::{command_type}::{command_name}",
	)
	def search_commands(self, query: str, top_k: int = 3):
	return self._store["commands"].search(query, top_k=top_k, hybrid=True)

	def execute_retrieval_action(self, action, user_id, bot_id, history="", location="", time_date=""):
	source = action.get("source")
	profile = action.get("profile")
	query = action.get("query")
	if not query:
	return None
	if source == "KnowledgeTree":
	return self.process_knowledge(query=query, history=history, location=location, time_date=time_date)
	if source == "ProfileTree":
	if profile == "user_profile":
	return self.search_profile_leaves(user_id, query, profile_type="user")
	if profile == "bot_profile":
	return self.search_profile_leaves(bot_id, query, profile_type="bot")
	return None

	def _merge_retrieval_actions(self, actions: List[ActionDecision]) -> Dict[str, List[Dict[str, Any]]]:
	retrieval_actions = [a for a in actions if a.type == "retrieval"]
	if not retrieval_actions:
	return {"KnowledgeTree": [], "ProfileTree": []}
	out = {"KnowledgeTree": [], "ProfileTree": []}
	for a in retrieval_actions:
	if a.source == "KnowledgeTree" or (a.memory_access and "KnowledgeTree" in a.memory_access):
	out["KnowledgeTree"].append(asdict(a))
	if a.source == "ProfileTree" or (a.memory_access and "ProfileTree" in a.memory_access):
	out["ProfileTree"].append(asdict(a))
	return out

	def process_actions(self, query, user_id, bot_id, history="", location="", time_date=""):
	relevant_commands = self.search_commands(query, top_k=3)
	available_actions = [
	{
	"name": c["metadata"].get("command_name", c["id"]),
	"description": c["metadata"].get("description", ""),
	"type": c["metadata"].get("type", "command"),
	}
	for c in relevant_commands
	]

	decision_prompt = {
	"query": query,
	"available_commands": available_actions,
	"history": history,
	"location": location,
	"time_date": time_date,
	"user_id": user_id,
	"bot_id": bot_id,
	"route_goal": "Use HeadExpert to decide whether retrieval is needed. If a tool/command is needed, choose it with ToolExpert.",
	}

	head_decision = self._lm_head(_safe_json_dumps(decision_prompt), mode="decision")
	actions = self._decision_to_actions(head_decision)
	self._last_plan = head_decision if isinstance(head_decision, dict) else {}

	command_needed = any(a.type == "command" for a in actions)
	retrieval_needed = any(a.type == "retrieval" for a in actions)

	if not actions:
	actions = [ActionDecision(type="None", sufficient_to_answer=True)]

	matched_commands: List[Any] = []
	retrieved_data: Dict[str, List[Any]] = {}

	if retrieval_needed:
	for action in actions:
	if action.type != "retrieval":
	continue
	if action.sufficient_to_answer:
	continue
	result = self.execute_retrieval_action(asdict(action), user_id, bot_id, history=history, location=location, time_date=time_date)
	if result is None:
	continue
	source = action.source or "KnowledgeTree"
	retrieved_data.setdefault(source, [])
	if isinstance(result, list):
	retrieved_data[source].extend(result)
	else:
	retrieved_data[source].append(result)

	if command_needed:
	tool_calls = self._lm_tool(
	query,
	tools=available_actions,
	)
	for call in tool_calls.get("tool_calls", []):
	if not isinstance(call, dict):
	continue
	command_name = call.get("name") or call.get("command_name")
	parameters = call.get("arguments") or call.get("parameters") or {}
	cmd_meta = next((cmd for cmd in relevant_commands if cmd["metadata"].get("command_name") == command_name), None)
	if cmd_meta:
	command_type = cmd_meta["metadata"].get("type", "custom")
	matched = self.command_registry.process_commands(command_name, command_type, parameters)
	matched_commands.extend(matched)

	shortcut_matches = self.command_registry.process_commands(query, "shortcut")
	matched_commands.extend(shortcut_matches)

	final_commands = " ".join([str(m) for m in matched_commands]) if matched_commands else None
	for key, values in retrieved_data.items():
	seen = set()
	deduped = []
	for v in values:
	try:
	normalized = self.normalize_for_hash(v)
	h = _safe_json_dumps(normalized)
	except Exception:
	h = str(v)
	if h not in seen:
	seen.add(h)
	deduped.append(v)
	retrieved_data[key] = deduped

	self._last_route = {"commands": final_commands, "retrieved_data": retrieved_data, "head_plan": head_decision}
	return {"commands": final_commands, "retrieved_data": retrieved_data}

	def warmup(self):
	try:
	_ = self.embed(["__gator_warmup__"], task="retrieval.passage")
	except Exception:
	pass

	def export_snapshot_bytes(self) -> bytes:
	payload = {
	"version": self.STATE_VERSION,
	"config": self.config,
	"embedder_pack": self._snapshot_embedder_pack(),
	"stores": self._snapshot_store(),
	"command_phrases": self._command_phrases,
	"custom_commands": self.command_registry.custom_commands,
	"commands": self.command_registry.commands,
	"last_route": self._last_route,
	"last_response": self._last_response,
	"last_plan": self._last_plan,
	}
	buf = io.BytesIO()
	torch.save(payload, buf)
	return lzma.compress(buf.getvalue(), preset=9)

	def import_snapshot_bytes(self, blob: bytes):
	payload = torch.load(io.BytesIO(lzma.decompress(blob)), map_location="cpu", weights_only=False)
	self._restore_store(payload.get("stores", {}))
	self.command_registry.custom_commands = payload.get("custom_commands", {}) or {}
	self.command_registry.commands = payload.get("commands", {}) or {}
	self._command_phrases = payload.get("command_phrases", {}) or {}
	self._last_route = payload.get("last_route", {}) or {}
	self._last_response = payload.get("last_response", "") or ""
	self._last_plan = payload.get("last_plan", {}) or {}
	if payload.get("embedder_pack"):
	self.embedder_pack = payload["embedder_pack"]
	self.embedder, self.embedder_tokenizer, self.embedder_pack = self._restore_embedder_from_pack(self.embedder_pack)
	self.embedder_local_dir = self.embedder_pack.get("local_dir", self.embedder_local_dir)

	def save_checkpoint(self, path: str = "GATOR.pt") -> None:
	self.warmup()
	payload = {
	"version": self.STATE_VERSION,
	"config": self.config,
	"embedder_pack": self._snapshot_embedder_pack(),
	"stores": self._snapshot_store(),
	"custom_commands": self.command_registry.custom_commands,
	"commands": self.command_registry.commands,
	"command_phrases": self._command_phrases,
	"last_route": self._last_route,
	"last_response": self._last_response,
	"last_plan": self._last_plan,
	}
	torch.save(payload, path, pickle_protocol=5)

	@classmethod
	def load_checkpoint(cls, path: str = "GATOR.pt") -> "GATOR":
	if not os.path.exists(path):
	raise FileNotFoundError(path)

	payload = torch.load(path, map_location="cpu", weights_only=False)
	cfg = payload.get("config", {})

	obj = cls(
	lm_checkpoint_path=cfg.get("lm_checkpoint_path", "LM.pt"),
	embedder_name=cfg.get("embedder_name", "second-state/jina-embeddings-v3-GGUF"),
	embedder_local_dir=cfg.get("embedder_local_dir", os.path.join("models", "jinaai")),
	embedder_filename=cfg.get("embedder_filename", GGUF_EMBED_FILENAME),
	device=cfg.get("device", "cpu"),
	warm_on_start=False,
	compression=cfg.get("compression", "lzma"),
	store_dtype=cfg.get("store_dtype", "float16"),
	cluster_k=int(cfg.get("cluster_k", 4)),
	auto_load_lm=bool(cfg.get("auto_load_lm", True)),
	strict_lm=bool(cfg.get("strict_lm", True)),
	embedder_pack=payload.get("embedder_pack"),
	)

	obj._restore_store(payload.get("stores", {}))
	obj.command_registry.custom_commands = payload.get("custom_commands", {}) or {}
	obj.command_registry.commands = payload.get("commands", {}) or {}
	obj._command_phrases = payload.get("command_phrases", {}) or {}
	obj._last_route = payload.get("last_route", {}) or {}
	obj._last_response = payload.get("last_response", "") or ""
	obj._last_plan = payload.get("last_plan", {}) or {}

	if payload.get("embedder_pack"):
	obj.embedder_pack = payload["embedder_pack"]
	obj.embedder = obj._restore_embedder_from_pack(obj.embedder_pack)

	return obj

	def infer_actions(self, prompt: str) -> Dict[str, Any]:
	return self._lm_head(prompt, mode="decision")

	def infer_command(self, prompt_payload: Dict[str, Any]) -> Dict[str, Any]:
	tools = prompt_payload.get("available_commands", [])
	query = prompt_payload.get("input_text", "")
	return self._lm_tool(query, tools=tools)

	def respond(self, query: str, user_id: str, bot_id: str, history: str = "", location: str = "", time_date: str = ""):
	routed = self.process_actions(query, user_id=user_id, bot_id=bot_id, history=history, location=location, time_date=time_date)
	return routed

	def process_command(self, args, *kwargs):
	return self.process_actions(args, *kwargs)

	def summary(self) -> Dict[str, Any]:
	return {
	"config": dict(self.config),
	"stores": {name: {"count": len(tree.docs), "dim": int(tree.embs.shape[1]) if tree.embs.size else self.embed_dim} for name, tree in self._store.items()},
	"commands": len(self.command_registry.commands),
	"custom_commands": len(self.command_registry.custom_commands),
	"last_route": self._last_route,
	"last_plan": self._last_plan,
	}

	def run_self_test(self) -> Dict[str, Any]:
	report = {}
	try:
	report["embed"] = tuple(self.embed(["hello world"], task="retrieval.passage").shape)
	except Exception as e:
	report["embed_error"] = repr(e)

	try:
	self.store_knowledge(["The capital of France is Paris."], tags=["fact"], source="test", importance=1.0)
	report["knowledge"] = self.search_knowledge("What is the capital of France?", top_k=1)
	except Exception as e:
	report["knowledge_error"] = repr(e)

	try:
	self.store_profile_leaf("user-1", "Likes Japanese food.", importance=0.8, profile_type="user")
	report["profile"] = self.search_profile_leaves("user-1", "food preference", profile_type="user", top_k=1)
	except Exception as e:
	report["profile_error"] = repr(e)

	try:
	self.store_command("test_cmd", "test command", "A small test command", command_type="custom", actions=[])
	report["commands"] = self.search_commands("test command", top_k=1)
	except Exception as e:
	report["commands_error"] = repr(e)

	return {"report": report, "summary": self.summary()}

	@property
	def profile_tree(self):
	return types.SimpleNamespace(search_leaves=lambda profile_id, query, top_k=3, min_score=0.0, use_clusters=True: self.search_profile_leaves(profile_id, query, profile_type="user", top_k=top_k, min_score=min_score))

	@property
	def conversation_tree(self):
	return types.SimpleNamespace(
	add_input_leaf=lambda text, id: self._store["conversation"].search(text, top_k=3, hybrid=True, use_clusters=True),
	add_output_leaf=lambda text, id: self.store_conversation_leaf(text, id, leaf_type="output"),
	)

	@property
	def knowledge_tree(self):
	return types.SimpleNamespace(
	retrieve_by_semantics=lambda query, num_clusters=2, top_k_per_cluster=3, min_score=0.0: self._store["knowledge"].retrieve_by_semantics(query, num_clusters=num_clusters, top_k_per_cluster=top_k_per_cluster, min_score=min_score),
	search=lambda query, top_k=5, hybrid=True: self._store["knowledge"].search(query, top_k=top_k, hybrid=hybrid),
	_embed_text=lambda text: self.embed(text, task="retrieval.passage")[0],
	_warm_collection=lambda: self.warmup(),
	)

	@property
	def command_tree(self):
	return types.SimpleNamespace(
	search_relevant_commands=lambda query, top_k=3: self.search_commands(query, top_k=top_k),
	add_command_branch=lambda command_name, command_action, command_type, description: self.store_command(command_action, command_name, description, command_type=command_type),
	update_command_description=lambda command_name, new_description: self.command_registry.update_command(command_name, description=new_description),
	)

	def save(self, path: str = "GATOR.pt"):
	return self.save_checkpoint(path)

	def __repr__(self) -> str:
	return f"GATOR(embedder={self.embedder_name!r}, local_dir={self.embedder_local_dir!r}, stores={list(self._store.keys())}, commands={len(self.command_registry.commands)})"

	class MemoryBank:
	"""
	Thin owner/wrapper around GATOR.

	- Loads an existing GATOR.pt if present.
	- Builds a new one if missing.
	- Uses AppData/LocalAppData as the default location when no path is passed.
	- Delegates all unknown attributes/methods directly to the underlying GATOR instance.
	"""

	DEFAULT_APP_FOLDER = "PackedLLM"
	DEFAULT_BUNDLE_NAME = "GATOR.pt"

	def __init__(
	self,
	gator_location: Optional[Union[str, os.PathLike]] = None,
	*,
	build_if_missing: bool = True,
	**gator_kwargs: Any,
	):
	self.root_dir = self._resolve_root_dir(gator_location)
	self.root_dir.mkdir(parents=True, exist_ok=True)

	self.checkpoint_path = self._resolve_checkpoint_path(self.root_dir)

	if self.checkpoint_path.exists():
	self.gator = GATOR.load_checkpoint(str(self.checkpoint_path))
	else:
	if not build_if_missing:
	raise FileNotFoundError(f"No GATOR checkpoint found at: {self.checkpoint_path}")

	self.gator = GATOR(**gator_kwargs)
	self.gator.save_checkpoint(str(self.checkpoint_path))

	@classmethod
	def _default_appdata_dir(cls) -> Path:
	local_appdata = os.getenv("LOCALAPPDATA")
	appdata = os.getenv("APPDATA")

	if local_appdata:
	base = Path(local_appdata)
	elif appdata:
	base = Path(appdata)
	else:
	base = Path.home() / "AppData" / "Local"

	return base / cls.DEFAULT_APP_FOLDER

	@classmethod
	def _resolve_root_dir(cls, gator_location: Optional[Union[str, os.PathLike]]) -> Path:
	if gator_location is None:
	return cls._default_appdata_dir()
	path = Path(gator_location).expanduser().resolve()
	if path.suffix.lower() == ".pt":
	return path.parent

	return path

	@classmethod
	def _resolve_checkpoint_path(cls, root_dir: Path) -> Path:
	return root_dir / cls.DEFAULT_BUNDLE_NAME

	def save(self) -> str:
	self.root_dir.mkdir(parents=True, exist_ok=True)
	self.gator.save_checkpoint(str(self.checkpoint_path))
	return str(self.checkpoint_path)

	def reload(self) -> None:
	if not self.checkpoint_path.exists():
	raise FileNotFoundError(str(self.checkpoint_path))
	self.gator = GATOR.load_checkpoint(str(self.checkpoint_path))

	def rebuild(self, **gator_kwargs: Any) -> None:
	self.gator = GATOR(**gator_kwargs)
	self.gator.save_checkpoint(str(self.checkpoint_path))

	def __getattr__(self, name: str) -> Any:
	return getattr(self.gator, name)

	def __dir__(self):
	base = set(super().__dir__())
	try:
	base.update(dir(self.gator))
	except Exception:
	pass
	return sorted(base)

	def __repr__(self) -> str:
	return f"MemoryBank(root_dir={str(self.root_dir)!r}, checkpoint_path={str(self.checkpoint_path)!r})"

	class HardwareProbe:
	@staticmethod
	def cpu() -> Dict[str, Any]:
	physical = psutil.cpu_count(logical=False) or psutil.cpu_count(logical=True) or 1
	logical = psutil.cpu_count(logical=True) or physical
	vm = psutil.virtual_memory()
	return {
	"physical_cores": physical,
	"logical_cores": logical,
	"available_ram_gb": vm.available / (1024 ** 3),
	"total_ram_gb": vm.total / (1024 ** 3),
	}

	@staticmethod
	def nvidia_gpu() -> Optional[Dict[str, Any]]:
	try:
	cmd = (
	"nvidia-smi --query-gpu=name,memory.total,memory.free,memory.used,"
	"utilization.gpu --format=csv,noheader,nounits"
	)
	out = subprocess.check_output(cmd.split(), stderr=subprocess.DEVNULL).decode("ascii").strip()
	if not out:
	return None
	parts = [p.strip() for p in out.split(",")]
	return {
	"backend": "cuda",
	"device_name": parts[0],
	"total_vram_gb": float(parts[1]) / 1024.0,
	"free_vram_gb": float(parts[2]) / 1024.0,
	"used_vram_gb": float(parts[3]) / 1024.0,
	"utilization_pct": float(parts[4]),
	}
	except Exception:
	return None

	@staticmethod
	def torch_gpu() -> Optional[Dict[str, Any]]:
	try:
	if torch.cuda.is_available():
	idx = torch.cuda.current_device()
	free_b, total_b = torch.cuda.mem_get_info(idx)
	return {
	"backend": "cuda",
	"device_name": torch.cuda.get_device_name(idx),
	"total_vram_gb": total_b / (1024 ** 3),
	"free_vram_gb": free_b / (1024 ** 3),
	"used_vram_gb": (total_b - free_b) / (1024 ** 3),
	"utilization_pct": None,
	}
	except Exception:
	pass

	try:
	if getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available():
	vm = psutil.virtual_memory()
	return {
	"backend": "metal",
	"device_name": "Apple Silicon (MPS)",
	"total_vram_gb": vm.total / (1024 ** 3),
	"free_vram_gb": vm.available / (1024 ** 3),
	"used_vram_gb": (vm.total - vm.available) / (1024 ** 3),
	"utilization_pct": None,
	}
	except Exception:
	pass
	return None

	@staticmethod
	def webgpu() -> Optional[Dict[str, Any]]:
	if not _WGPU_AVAILABLE:
	return None
	try:
	request = getattr(wgpu.gpu, "request_adapter_sync", None) or getattr(wgpu.gpu, "request_adapter")
	adapter = request(power_preference="high-performance")
	limits = getattr(adapter, "limits", {}) or {}
	max_buffer_bytes = limits.get("max-buffer-size") or limits.get("maxBufferSize") or 0
	est_gb = (max_buffer_bytes / (1024 ** 3)) * 0.5 if max_buffer_bytes else 1.0
	return {
	"backend": "webgpu",
	"device_name": getattr(adapter, "summary", "WebGPU adapter"),
	"total_vram_gb": est_gb,
	"free_vram_gb": est_gb,
	"used_vram_gb": 0.0,
	"utilization_pct": None,
	}
	except Exception:
	return None

	@classmethod
	def snapshot(cls) -> Dict[str, Any]:
	gpu = cls.nvidia_gpu() or cls.torch_gpu()
	webgpu = None if gpu is not None else cls.webgpu()
	return {"cpu": cls.cpu(), "gpu": gpu, "webgpu": webgpu}


	@dataclass
	class RunMetrics:
	duration_sec: float
	tokens: int
	tokens_per_sec: float
	template_used: Optional[str]
	offload_plan: Dict[str, Any]
	telemetry_deltas: Dict[str, Any]
	timestamp: float = field(default_factory=time.time)

	class ExpertHandle(nn.Module):
	def __init__(self, name: str, spec: Dict[str, Any]):
	super().__init__()
	self.name = name
	self.spec: Dict[str, Any] = dict(spec)
	self._llama = None
	self.last_prompt: str = ""
	self.last_response: str = ""
	self.last_template_used: Optional[str] = None
	self.last_offload_plan: Dict[str, Any] = {}
	self.last_metrics: Dict[str, Any] = {}
	self.metrics_history: List[Dict[str, Any]] = []
	self.call_count: int = 0
	self.total_inference_sec: float = 0.0
	self.avg_inference_sec: float = 0.0
	self.total_tokens_generated: int = 0
	self.avg_tokens_per_sec: float = 0.0
	self.load_time_sec: Optional[float] = None

	def __getstate__(self):
	state = self.__dict__.copy()

	if self.name == "TranslationExpert" and isinstance(state.get("_llama"), dict):
	m_data = state["_llama"]
	model = m_data.get("model")

	state["_llama"] = {
	"state_dict": model.state_dict(),
	"config": model.config,
	"tokenizer": m_data.get("tokenizer"),
	"local_dir": m_data.get("local_dir")
	}
	elif self.name != "TranslationExpert":
	state["_llama"] = None

	return state

	def __setstate__(self, state):
	self.__dict__.update(state)

	if self.name == "TranslationExpert" and isinstance(self._llama, dict) and "state_dict" in self._llama:
	from transformers import MarianMTModel

	data = self._llama
	model = MarianMTModel(data["config"])
	model.load_state_dict(data["state_dict"])

	self._llama = {
	"tokenizer": data["tokenizer"],
	"model": model,
	"local_dir": data["local_dir"]
	}

	def is_loaded(self) -> bool:
	return self._llama is not None

	def record_run(
	self,
	prompt_repr: str,
	response: str,
	duration_sec: float,
	tokens: int,
	telemetry_deltas: Dict[str, Any],
	template_used: Optional[str] = None,
	) -> None:
	self.last_prompt = prompt_repr
	self.last_response = response
	self.last_template_used = template_used
	self.call_count += 1
	self.total_inference_sec += duration_sec
	self.avg_inference_sec = self.total_inference_sec / self.call_count
	self.total_tokens_generated += tokens
	tps = (tokens / duration_sec) if duration_sec > 0 else 0.0
	self.avg_tokens_per_sec = ((self.avg_tokens_per_sec * (self.call_count - 1)) + tps) / self.call_count
	entry = asdict(
	RunMetrics(
	duration_sec=duration_sec,
	tokens=tokens,
	tokens_per_sec=tps,
	template_used=template_used,
	offload_plan=self.last_offload_plan,
	telemetry_deltas=telemetry_deltas,
	)
	)
	self.last_metrics = entry
	self.metrics_history.append(entry)
	if len(self.metrics_history) > 25:
	self.metrics_history.pop(0)

	def forward(self, args, *kwargs): # pragma: no cover
	raise RuntimeError(f"ExpertHandle('{self.name}') is not directly callable.")


	# ================================================================
	# PackedLM
	# ================================================================

	class PackedLM(nn.ModuleDict):
	DEFAULT_LAYER_GUESS = 32
	VRAM_SAFETY_MARGIN = 1.15
	WEBGPU_SAFETY_MARGIN = 1.40

	_R1_USER_TOKEN = "<｜User｜>"
	_R1_ASSISTANT_TOKEN = "<｜Assistant｜>"

	_XLAM_TASK_INSTRUCTION_DEFAULT = (
	"Based on the user's query, decide whether a function call is needed and, if so, "
	"produce the correct call(s) using only the tools provided."
	)
	_XLAM_FORMAT_INSTRUCTION_DEFAULT = (
	'Generate a JSON object of the form {"tool_calls": [{"name": "func_name", '
	'"arguments": {"arg1": "value1"}}, ...]}. If no function call is needed, '
	'return {"tool_calls": []}. Output JSON only, nothing else.'
	)

	def __init__(self, bundle_path: Optional[str] = DEFAULT_BUNDLE_PATH, auto_load_bundle: bool = True):
	super().__init__()
	self.bundle_path = bundle_path
	self.bundle: Optional[Dict[str, Any]] = None
	self.last_expert: str = ""
	self._embed_tempfiles: Dict[str, str] = {}
	self._hf_translation_dir = str(DEFAULT_ZH_EN_DIR)

	if auto_load_bundle and bundle_path and os.path.exists(bundle_path):
	self.load_bundle(bundle_path)

	def __getstate__(self):
	state = self.__dict__.copy()
	for name, expert in self.items():
	if name == "TranslationExpert":
	continue

	if hasattr(expert, "_llama"):
	expert._llama = None
	return state

	def load_bundle(self, bundle_path: str) -> "PackedLM":
	self.bundle_path = bundle_path
	self.bundle = torch.load(bundle_path, map_location="cpu", weights_only=False)
	models = self.bundle.get("models", {})
	for name, spec in models.items():
	self[name] = ExpertHandle(name, spec)
	# Optional translation module for zh->en support.
	if "TranslationExpert" not in self and "zh_en_translator" not in self:
	self["TranslationExpert"] = ExpertHandle(
	"TranslationExpert",
	{
	"kind": "hf_seq2seq",
	"repo_id": ZH_EN_REPO_ID,
	"local_dir": self._hf_translation_dir,
	"source_lang": "zh",
	"target_lang": "en",
	},
	)
	return self

	def reload_expert(self, expert_name: str) -> "PackedLM":
	if expert_name not in self:
	raise KeyError(f"Unknown expert '{expert_name}'. Loaded experts: {list(self.keys())}")
	expert = self[expert_name]
	if expert.spec.get("kind") == "hf_seq2seq":
	self._load_translation_backend(force_reload=True)
	return self
	expert._llama = None
	self._get_llama(expert, force_reload=True)
	return self

	def unload_expert(self, expert_name: str) -> None:
	if expert_name in self and hasattr(self[expert_name], "_llama"):
	self[expert_name]._llama = None

	def unload_all(self) -> None:
	for name in list(self.keys()):
	self.unload_expert(name)

	def summary(self) -> Dict[str, Any]:
	out: Dict[str, Any] = {}
	for name, expert in self.items():
	out[name] = {
	"loaded": expert.is_loaded(),
	"call_count": expert.call_count,
	"avg_inference_sec": round(expert.avg_inference_sec, 4),
	"avg_tokens_per_sec": round(expert.avg_tokens_per_sec, 2),
	"last_offload_plan": expert.last_offload_plan,
	"last_response_preview": (expert.last_response[:160] + "...") if len(expert.last_response) > 160 else expert.last_response,
	}
	out["_last_expert"] = self.last_expert
	return out

	@staticmethod
	def _tensor_to_bytes(t: Any) -> bytes:
	if isinstance(t, bytes):
	return t
	if torch.is_tensor(t):
	return bytes(t.detach().cpu().contiguous().numpy().tobytes())
	raise TypeError(f"Unsupported embedded asset type: {type(t)}")

	def _resolve_model_path(self, expert: ExpertHandle) -> str:
	spec = expert.spec
	path = spec.get("path", "")
	if path and os.path.exists(path):
	return path

	assets = (self.bundle or {}).get("assets", {}).get("gguf", {})
	embedded = assets.get(expert.name)
	if embedded is None:
	raise FileNotFoundError(f"Model '{expert.name}' has no external path and no embedded GGUF bytes in the bundle.")

	cache_key = expert.name
	if cache_key in self._embed_tempfiles and os.path.exists(self._embed_tempfiles[cache_key]):
	return self._embed_tempfiles[cache_key]

	raw = self._tensor_to_bytes(embedded)
	tmp = tempfile.NamedTemporaryFile(prefix=f"{expert.name}_", suffix=".gguf", delete=False)
	tmp.write(raw)
	tmp.flush()
	tmp.close()
	self._embed_tempfiles[cache_key] = tmp.name
	return tmp.name

	def _resolve_projector_path(self, expert: ExpertHandle) -> Optional[str]:
	spec = expert.spec
	path = spec.get("mmproj_path") or spec.get("clip_model_path") or spec.get("clip_path")
	if not path:
	return None
	if os.path.exists(path):
	return path

	assets = (self.bundle or {}).get("assets", {}).get("gguf", {})
	embedded = assets.get(path)
	if embedded is None:
	return None

	cache_key = f"{expert.name}_mmproj"
	if cache_key in self._embed_tempfiles and os.path.exists(self._embed_tempfiles[cache_key]):
	return self._embed_tempfiles[cache_key]

	raw = self._tensor_to_bytes(embedded)
	tmp = tempfile.NamedTemporaryFile(prefix=f"{expert.name}_mmproj_", suffix=".gguf", delete=False)
	tmp.write(raw)
	tmp.flush()
	tmp.close()
	self._embed_tempfiles[cache_key] = tmp.name
	return tmp.name

	def _plan_offload(self, expert: ExpertHandle) -> Dict[str, Any]:
	hw = HardwareProbe.snapshot()
	spec = expert.spec
	try:
	model_path = self._resolve_model_path(expert)
	file_size_gb = os.path.getsize(model_path) / (1024 ** 3)
	except Exception:
	file_size_gb = float(spec.get("approx_size_gb", 2.0))

	n_layers = int(spec.get("n_layers", self.DEFAULT_LAYER_GUESS))
	reasoning: List[str] = []

	if hw["gpu"] is not None:
	gpu = hw["gpu"]
	needed = file_size_gb * self.VRAM_SAFETY_MARGIN
	if gpu["free_vram_gb"] >= needed:
	n_gpu_layers = -1
	reasoning.append(f"{gpu['backend']} GPU '{gpu['device_name']}' has {gpu['free_vram_gb']:.2f}GB free >= {needed:.2f}GB needed -> full offload")
	else:
	frac = max(0.0, gpu["free_vram_gb"] / needed) if needed > 0 else 0.0
	n_gpu_layers = max(0, int(frac * n_layers))
	reasoning.append(f"{gpu['backend']} GPU has only {gpu['free_vram_gb']:.2f}GB free of {needed:.2f}GB needed -> partial offload of {n_gpu_layers}/{n_layers} layers")
	backend = gpu["backend"]
	elif hw["webgpu"] is not None:
	webgpu = hw["webgpu"]
	needed = file_size_gb * self.WEBGPU_SAFETY_MARGIN
	if webgpu["free_vram_gb"] >= needed:
	n_gpu_layers = -1
	reasoning.append("WebGPU adapter's estimated budget covers the full model -> full offload")
	else:
	frac = max(0.0, webgpu["free_vram_gb"] / needed) if needed > 0 else 0.0
	n_gpu_layers = max(0, int(frac * n_layers))
	reasoning.append(f"WebGPU adapter budget covers ~{frac * 100:.0f}% of the model -> partial offload of {n_gpu_layers}/{n_layers} layers")
	backend = "webgpu"
	else:
	n_gpu_layers = 0
	backend = "cpu"
	reasoning.append("No CUDA/Metal GPU or WebGPU adapter detected -> CPU-only")

	n_threads = max(1, hw["cpu"]["physical_cores"] - 1)
	return {
	"backend": backend,
	"n_gpu_layers": n_gpu_layers,
	"n_threads": n_threads,
	"model_size_gb": round(file_size_gb, 3),
	"hardware_snapshot": hw,
	"rationale": " \| ".join(reasoning),
	}

	def _get_llama(self, expert: ExpertHandle, force_reload: bool = False):
	if expert.is_loaded() and not force_reload:
	return expert._llama
	if not _LLAMA_CPP_AVAILABLE:
	raise RuntimeError("llama-cpp-python is not installed.")

	model_path = self._resolve_model_path(expert)
	plan = self._plan_offload(expert)

	chat_handler = None
	if expert.spec.get("vision", False):
	projector_path = self._resolve_projector_path(expert)
	if projector_path:
	from llama_cpp.llama_chat_format import Qwen25VLChatHandler
	chat_handler = Qwen25VLChatHandler(clip_model_path=projector_path)

	t0 = time.perf_counter()
	llama = Llama(
	model_path=model_path,
	n_ctx=expert.spec.get("ctx", 8192),
	n_threads=plan["n_threads"],
	n_gpu_layers=plan["n_gpu_layers"],
	chat_handler=chat_handler,
	verbose=False,
	)
	expert.load_time_sec = time.perf_counter() - t0
	expert.last_offload_plan = plan
	expert._llama = llama
	return llama

	def _load_translation_backend(self):
	if "TranslationExpert" not in self:
	raise RuntimeError("TranslationExpert not found.")

	expert = self["TranslationExpert"]

	if isinstance(expert._llama, dict) and "model" in expert._llama:
	return expert._llama

	if self.bundle and "assets" in self.bundle and "translation" in self.bundle["assets"]:
	print("Hydrating TranslationExpert from embedded bundle assets...")
	from transformers import MarianMTModel, MarianTokenizer, MarianConfig
	import tempfile
	import shutil

	asset_data = self.bundle["assets"]["translation"]

	tok_tmp = Path(tempfile.mkdtemp(prefix="zh_en_tok_"))
	try:
	for filename, filebytes in asset_data["tokenizer_files"].items():
	target_path = tok_tmp / filename
	target_path.parent.mkdir(parents=True, exist_ok=True)
	target_path.write_bytes(filebytes)
	tokenizer = MarianTokenizer.from_pretrained(str(tok_tmp))
	finally:
	shutil.rmtree(tok_tmp, ignore_errors=True)

	config = MarianConfig.from_dict(asset_data["config"])
	model = MarianMTModel(config)
	model.load_state_dict(asset_data["state_dict"])

	expert._llama = {
	"tokenizer": tokenizer,
	"model": model,
	"local_dir": self._hf_translation_dir
	}
	return expert._llama

	raise RuntimeError("Translation model data not found or corrupted in bundle assets.")

	def _translate_with_internal_model(self, text: str) -> str:
	backend = self._load_translation_backend()
	tokenizer = backend["tokenizer"]
	model = backend["model"]

	inputs = tokenizer(text, return_tensors="pt", truncation=True)

	inputs = {k: v.to(model.device) for k, v in inputs.items()}

	with torch.no_grad():
	generated = model.generate(**inputs, max_new_tokens=128, renormalize_logits=True, repetition_penalty=1.1,)

	return tokenizer.batch_decode(generated, skip_special_tokens=True)[0].strip()

	def translate_zh_en(self, text: str, template: Optional[Union[str, Callable]] = None) -> str:
	del template # reserved for future parity with the other experts
	if not text:
	return ""
	if not CHINESE_RE.search(text):
	return text
	return self._translate_with_internal_model(text)

	def _translate_chinese_spans(self, text: str) -> str:
	if not CHINESE_RE.search(text):
	return text

	def repl(match: re.Match) -> str:
	segment = match.group(0).strip()
	if not segment:
	return segment
	try:
	translated = self.translate_zh_en(segment)
	return translated if translated else segment
	except Exception:
	return segment

	return CHINESE_SPAN_RE.sub(repl, text)

	@staticmethod
	def _is_url(value: str) -> bool:
	try:
	return urlparse(value).scheme in ("http", "https")
	except Exception:
	return False

	@staticmethod
	def _image_to_data_uri(image_path: str, max_pixels: int = 1_000_000) -> str:
	if not _PIL_AVAILABLE:
	raise RuntimeError("Pillow is required for local image encoding.")
	with Image.open(image_path) as img:
	if img.mode not in ("RGB", "RGBA"):
	img = img.convert("RGB")
	width, height = img.size
	if width * height > max_pixels:
	scale = (max_pixels / (width * height)) ** 0.5
	img = img.resize((int(width * scale), int(height * scale)), Image.Resampling.LANCZOS)
	buffer = io.BytesIO()
	img.save(buffer, format="PNG")
	b64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
	return f"data:image/png;base64,{b64}"

	@staticmethod
	def _build_messages(
	template: Optional[Union[str, list, Callable]],
	prompt: str,
	default_builder: Callable[..., List[Dict[str, Any]]],
	**fields: Any,
	) -> List[Dict[str, Any]]:
	if template is None:
	return default_builder(prompt, **fields)
	if callable(template) and not isinstance(template, (str, list)):
	return template(prompt, **fields)
	if isinstance(template, list):
	return template
	if isinstance(template, str):
	base = default_builder(prompt, **fields)
	content = template.format(prompt=prompt, **fields)
	if base and base[0].get("role") == "system":
	return [base[0], {"role": "user", "content": content}]
	return [{"role": "user", "content": content}]
	raise TypeError("template must be None, a list[dict], a callable, or a str")

	def _exec_chat(self, expert_name: str, messages: List[Dict[str, Any]], max_tokens: int, temperature: float, **gen_kwargs):
	expert = self[expert_name]
	llama = self._get_llama(expert)
	t_pre = capture_telemetry()
	t0 = time.perf_counter()
	out = llama.create_chat_completion(messages=messages, max_tokens=max_tokens, temperature=temperature, **gen_kwargs)
	dt = time.perf_counter() - t0
	t_post = capture_telemetry()
	text = out["choices"][0]["message"]["content"]
	tokens = out.get("usage", {}).get("completion_tokens", 0)
	deltas = calculate_delta(t_pre, t_post)
	return text, tokens, dt, deltas

	def _exec_completion(self, expert_name: str, raw_prompt: str, max_tokens: int, temperature: float, stop: Optional[List[str]] = None, **gen_kwargs):
	expert = self[expert_name]
	llama = self._get_llama(expert)
	t_pre = capture_telemetry()
	t0 = time.perf_counter()
	out = llama.create_completion(prompt=raw_prompt, max_tokens=max_tokens, temperature=temperature, stop=stop, **gen_kwargs)
	dt = time.perf_counter() - t0
	t_post = capture_telemetry()
	text = out["choices"][0]["text"]
	tokens = out.get("usage", {}).get("completion_tokens", 0)
	deltas = calculate_delta(t_pre, t_post)
	return text, tokens, dt, deltas

	def _finalize(self, expert_name: str, prompt_repr: str, final_text: str, duration_sec: float, tokens: int, telemetry_deltas: Dict[str, Any], template_used: Optional[str]) -> str:
	expert = self[expert_name]
	expert.record_run(prompt_repr=prompt_repr, response=final_text, duration_sec=duration_sec, tokens=tokens, telemetry_deltas=telemetry_deltas, template_used=template_used)
	self.last_expert = expert_name
	return final_text

	def _default_creative_messages(self, prompt: str, tone: Optional[str] = None, length: Optional[str] = None, pov: Optional[str] = None, style: Optional[str] = None) -> List[Dict[str, Any]]:
	system = "You are a creative writing assistant."
	if tone:
	system += f" Match this tone: {tone}."
	constraints = [c for c in (f"length: {length}" if length else None, f"POV: {pov}" if pov else None, f"style: {style}" if style else None) if c]
	user = prompt + ("\n\nConstraints: " + ", ".join(constraints) if constraints else "")
	return [{"role": "system", "content": system}, {"role": "user", "content": user}]

	def creative_expert(self, prompt: str, tone: Optional[str] = None, length: Optional[str] = None, pov: Optional[str] = None, style: Optional[str] = None, template: Optional[Union[str, list, Callable]] = None, max_tokens: int = 600, temperature: float = 0.9, **gen_kwargs) -> str:
	messages = self._build_messages(template, prompt, self._default_creative_messages, tone=tone, length=length, pov=pov, style=style)
	text, tokens, dt, deltas = self._exec_chat("CreativeExpert", messages, max_tokens, temperature, **gen_kwargs)
	return self._finalize("CreativeExpert", prompt, text, dt, tokens, deltas, "custom" if template is not None else "default")

	def _default_code_messages(self, prompt: str, language: Optional[str] = None, context: Optional[str] = None, constraints: Optional[str] = None) -> List[Dict[str, Any]]:
	parts = [f"Task: {prompt}"]
	if language:
	parts.append(f"Language: {language}")
	if context:
	parts.append(f"Context:\n{context}")
	if constraints:
	parts.append(f"Constraints:\n{constraints}")
	return [{"role": "user", "content": "\n".join(parts)}]

	def code_expert(self, prompt: str, language: Optional[str] = None, context: Optional[str] = None, constraints: Optional[str] = None, template: Optional[Union[str, list, Callable]] = None, max_tokens: int = 1200, temperature: float = 0.2, **gen_kwargs) -> str:
	messages = self._build_messages(template, prompt, self._default_code_messages, language=language, context=context, constraints=constraints)
	text, tokens, dt, deltas = self._exec_chat("CodeExpert", messages, max_tokens, temperature, **gen_kwargs)
	return self._finalize("CodeExpert", prompt, text, dt, tokens, deltas, "custom" if template is not None else "default")

	def _logic_default_raw(self, prompt: str, mode: str):
	instruction = prompt.strip()
	if mode == "deep_then_answer":
	instruction += "\nPlease reason step by step, then provide the final answer succinctly."
	raw = f"{self._R1_USER_TOKEN}{instruction}{self._R1_ASSISTANT_TOKEN}"
	return raw, None, lambda t: t.strip()
	if mode == "think_only":
	instruction += "\nPlease reason step by step, and do not provide a final answer."
	raw = f"{self._R1_USER_TOKEN}{instruction}{self._R1_ASSISTANT_TOKEN}<think>\n"
	return raw, ["</think>"], lambda t: "<think>\n" + t.strip() + "\n</think>"
	if mode == "skip_reasoning":
	raw = f"{self._R1_USER_TOKEN}{prompt.strip()}{self._R1_ASSISTANT_TOKEN}<think>\n\n</think>\n\n"
	return raw, None, lambda t: t.strip()
	raise ValueError(f"Unknown LogicExpert mode: {mode!r}")

	def logic_expert(self, prompt: str, mode: str = "deep_then_answer", template: Optional[Union[str, Callable]] = None, max_tokens: int = 1024, temperature: float = 0.6, **gen_kwargs) -> str:
	if mode not in ("deep_then_answer", "skip_reasoning", "think_only"):
	raise ValueError("mode must be one of: deep_then_answer, skip_reasoning, think_only")

	if template is not None:
	stop = gen_kwargs.pop("stop", None)
	raw_prompt = template(prompt, mode) if callable(template) else str(template).format(prompt=prompt, mode=mode)
	wrap = lambda t: t
	else:
	raw_prompt, stop, wrap = self._logic_default_raw(prompt, mode)

	raw_text, tokens, dt, deltas = self._exec_completion("LogicExpert", raw_prompt, max_tokens, temperature, stop=stop, **gen_kwargs)
	final_text = wrap(raw_text)
	return self._finalize("LogicExpert", prompt, final_text, dt, tokens, deltas, "custom" if template is not None else mode)

	def _default_role_messages(self, prompt: str, character_card: Optional[str] = None) -> List[Dict[str, Any]]:
	system = character_card or "You are roleplaying a character."
	system += " Respond using Classic Internet RP formatting: action speech narration."
	return [{"role": "system", "content": system}, {"role": "user", "content": prompt}]

	def role_expert(self, prompt: str, character_card: Optional[str] = None, template: Optional[Union[str, list, Callable]] = None, max_tokens: int = 400, temperature: float = 0.9, **gen_kwargs) -> str:
	messages = self._build_messages(template, prompt, self._default_role_messages, character_card=character_card)
	text, tokens, dt, deltas = self._exec_chat("RoleExpert", messages, max_tokens, temperature, **gen_kwargs)
	return self._finalize("RoleExpert", prompt, text, dt, tokens, deltas, "custom" if template is not None else "default")

	def _default_affect_messages(self, text: str) -> List[Dict[str, Any]]:
	system = "You are a compact classifier. Output only valid JSON."
	user = f'Classify the emotional tone of this text:\n{text}\nReturn: {{"emotion": "...", "confidence": 0-1, "evidence": "..."}}'
	return [{"role": "system", "content": system}, {"role": "user", "content": user}]

	def affect_expert(self, text: str, template: Optional[Union[str, list, Callable]] = None, max_tokens: int = 300, temperature: float = 0.3, **gen_kwargs) -> str:
	messages = self._build_messages(template, text, self._default_affect_messages)
	out, tokens, dt, deltas = self._exec_chat("AffectExpert", messages, max_tokens, temperature, **gen_kwargs)
	return self._finalize("AffectExpert", text, out, dt, tokens, deltas, "custom" if template is not None else "default")

	def _default_vision_messages(self, prompt: str, image: Optional[str] = None) -> List[Dict[str, Any]]:
	if not image:
	return [{"role": "user", "content": prompt}]
	data_uri = image if self._is_url(image) else self._image_to_data_uri(image)
	content = [
	{"type": "image_url", "image_url": {"url": data_uri}},
	{"type": "text", "text": prompt},
	]
	return [{"role": "user", "content": content}]

	def _run_multimodal(self, expert_name: str, prompt: str, image: Optional[str], template: Optional[Union[str, list, Callable]], max_tokens: int, temperature: float, **gen_kwargs) -> str:
	messages = self._build_messages(template, prompt, self._default_vision_messages, image=image)
	text, tokens, dt, deltas = self._exec_chat(expert_name, messages, max_tokens, temperature, **gen_kwargs)
	return self._finalize(expert_name, prompt, text, dt, tokens, deltas, "custom" if template is not None else "default")

	def vision_expert(self, prompt: str, image: Optional[str] = None, template: Optional[Union[str, list, Callable]] = None, max_tokens: int = 512, temperature: float = 0.4, **gen_kwargs) -> str:
	return self._run_multimodal("VisionExpert", prompt, image, template, max_tokens, temperature, **gen_kwargs)

	def head_expert(self, prompt: str, image: Optional[str] = None, template: Optional[Union[str, list, Callable]] = None, max_tokens: int = 512, temperature: float = 0.4, **gen_kwargs) -> str:
	return self._run_multimodal("HeadExpert", prompt, image, template, max_tokens, temperature, **gen_kwargs)

	def _default_math_messages(self, prompt: str) -> List[Dict[str, Any]]:
	system = "You are a precise math and reasoning assistant."
	user = f"Solve the following. Show formulas, compute carefully, and state the final answer clearly.\n\n{prompt}"
	return [{"role": "system", "content": system}, {"role": "user", "content": user}]

	def math_expert(self, prompt: str, template: Optional[Union[str, list, Callable]] = None, max_tokens: int = 500, temperature: float = 0.2, **gen_kwargs) -> str:
	messages = self._build_messages(template, prompt, self._default_math_messages)
	text, tokens, dt, deltas = self._exec_chat("MathExpert", messages, max_tokens, temperature, repeat_penalty=1.15, **gen_kwargs)
	if CHINESE_RE.search(text):
	text = self._translate_chinese_spans(text)
	text = collapse_repeated_lines(text, max_repeat=1)
	return self._finalize("MathExpert", prompt, text, dt, tokens, deltas, "custom" if template is not None else "default")

	def _default_tool_prompt(self, task_instruction: str, tools_json: str, format_instruction: str, query: str) -> str:
	return (
	"You are an AI assistant for function calling. For politically sensitive questions, "
	"security and privacy issues, and other non-computer science questions, you will refuse "
	"to answer\n"
	"### Instruction:\n"
	f"[BEGIN OF TASK INSTRUCTION]\n{task_instruction}\n[END OF TASK INSTRUCTION]\n\n"
	f"[BEGIN OF AVAILABLE TOOLS]\n{tools_json}\n[END OF AVAILABLE TOOLS]\n\n"
	f"[BEGIN OF FORMAT INSTRUCTION]\n{format_instruction}\n[END OF FORMAT INSTRUCTION]\n\n"
	f"[BEGIN OF QUERY]\n{query}\n[END OF QUERY]\n\n"
	"### Response:\n"
	)

	@staticmethod
	def _safe_parse_tool_json(text: str) -> Optional[Dict[str, Any]]:
	text = text.strip()
	try:
	return json.loads(text)
	except json.JSONDecodeError:
	start, end = text.find("{"), text.rfind("}")
	if start != -1 and end != -1 and end > start:
	try:
	return json.loads(text[start:end + 1])
	except json.JSONDecodeError:
	return None
	return None

	def tool_expert(self, query: str, tools: Optional[List[Dict[str, Any]]] = None, task_instruction: Optional[str] = None, format_instruction: Optional[str] = None, template: Optional[Union[str, Callable]] = None, max_tokens: int = 512, temperature: float = 0.2, **gen_kwargs) -> str:
	tools = tools or []
	task_instruction = task_instruction or self._XLAM_TASK_INSTRUCTION_DEFAULT
	format_instruction = format_instruction or self._XLAM_FORMAT_INSTRUCTION_DEFAULT
	tools_json = json.dumps(tools, indent=2)

	if template is not None:
	stop = gen_kwargs.pop("stop", ["### Instruction:"])
	raw_prompt = template(query, tools, task_instruction, format_instruction) if callable(template) else str(template).format(query=query, tools=tools_json, task_instruction=task_instruction, format_instruction=format_instruction)
	else:
	stop = ["### Instruction:"]
	raw_prompt = self._default_tool_prompt(task_instruction, tools_json, format_instruction, query)

	raw_text, tokens, dt, deltas = self._exec_completion("ToolExpert", raw_prompt, max_tokens, temperature, stop=stop, **gen_kwargs)
	parsed = self._safe_parse_tool_json(raw_text)
	final_text = json.dumps(parsed, indent=2) if parsed is not None else raw_text.strip()
	return self._finalize("ToolExpert", query, final_text, dt, tokens, deltas, "custom" if template is not None else "default")

	def translation_expert(self, text: str) -> str:
	return self.translate_zh_en(text)

	def save_checkpoint(self, path: str = DEFAULT_CHECKPOINT_PATH) -> None:
	self.unload_all()
	torch.save(self, path)

	@classmethod
	def load_checkpoint(cls, path: str = DEFAULT_CHECKPOINT_PATH) -> "PackedLM":
	return torch.load(path, map_location="cpu", weights_only=False)

	def run_self_test(self, image_test_source: Optional[str] = None) -> Dict[str, Any]:
	prompts = {
	"CreativeExpert": "Write a short fantasy story (150 words max) about a dragon that discovers a computer hidden beneath a mountain.",
	"CodeExpert": "Write a Python implementation of quicksort. Complete runnable function, include comments, briefly explain time complexity.",
	"LogicExpert": "All robots can compute. Some computers are robots. No calculators are robots. What conclusions can be logically inferred?",
	"RoleExpert": "Explain how transformer attention works while staying fully in character.",
	"HeadExpert": "Explain how Mixture-of-Experts (MoE) routing works. Focus on: expert selection, gating, token routing, and efficiency benefits.",
	"MathExpert": "A train travels 120 miles in 2 hours and then 180 miles in 3 hours. What was its average speed for the entire trip?",
	"ToolExpert": "What is the weather in St. Louis right now?",
	"AffectExpert": "Your core navigation router is completely dropping telemetry packets! Fix this or we pull our implementation down tonight!",
	"VisionExpert": "Describe the visual elements, layout, and any text in this image.",
	"TranslationExpert": "你好，世界。这个模型应该把中文翻译成英文。",
	}

	report: Dict[str, Any] = {}
	for name in self.keys():
	if name not in prompts:
	continue
	try:
	if name == "LogicExpert":
	report[name] = {mode: self.logic_expert(prompts[name], mode=mode) for mode in ("deep_then_answer", "skip_reasoning", "think_only")}
	elif name in ("HeadExpert", "VisionExpert"):
	img = image_test_source if (image_test_source and os.path.exists(image_test_source)) else None
	method = self.head_expert if name == "HeadExpert" else self.vision_expert
	report[name] = method(prompts[name], image=img)
	elif name == "ToolExpert":
	report[name] = self.tool_expert(prompts[name], tools=[
	{"name": "get_weather", "description": "Get current weather for a city", "parameters": {"city": "string"}},
	{"name": "send_discord_message", "description": "Send a message to a Discord webhook", "parameters": {"webhook_url": "string", "content": "string"}},
	])
	elif name == "TranslationExpert":
	report[name] = self.translation_expert(prompts[name])
	else:
	dispatch = {
	"CreativeExpert": self.creative_expert,
	"CodeExpert": self.code_expert,
	"RoleExpert": self.role_expert,
	"AffectExpert": self.affect_expert,
	"MathExpert": self.math_expert,
	}
	report[name] = dispatch[name](prompts[name])
	except Exception as e:
	report[name] = f"[ERROR] {type(e).__name__}: {e}"

	return {"per_expert_responses": report, "summary": self.summary()}


	__all__ = ["PackedLM", "ExpertHandle", "HardwareProbe", "capture_telemetry", "calculate_delta"]


	class PackedLMCheckpointRuntime:
	"""Load and operate a serialized PackedLM checkpoint.

	Parameters
	----------
	checkpoint_path:
	Path to the saved `LM.pt` checkpoint.
	packedlm_module:
	Optional module name to import before loading. Use this when the
	`PackedLM` class lives in a separate Python module.
	Examples: "packedlm", "my_project.packedlm", or None if the class
	is already imported in the current process.
	map_location:
	Passed to `torch.load`. Usually "cpu".
	weights_only:
	Must be False for a full object checkpoint saved with `torch.save(obj, ...)`.
	strict_type_check:
	If True, verifies that the loaded object looks like a PackedLM instance.
	"""

	def __init__(
	self,
	checkpoint_path: Union[str, Path] = "LM.pt",
	packedlm_module: Optional[str] = None,
	map_location: str = "cpu",
	weights_only: bool = False,
	strict_type_check: bool = True,
	):
	self.checkpoint_path = Path(checkpoint_path)
	self.packedlm_module = packedlm_module
	self.map_location = map_location
	self.weights_only = weights_only
	self.strict_type_check = strict_type_check

	self.model: Any = None
	self.load()

	def _import_checkpoint_module(self) -> None:
	if not self.packedlm_module:
	return
	importlib.import_module(self.packedlm_module)

	def _validate_model(self) -> None:
	if self.model is None:
	raise RuntimeError("PackedLM checkpoint is not loaded.")

	required_attrs = [
	"creative_expert",
	"code_expert",
	"logic_expert",
	"role_expert",
	"affect_expert",
	"head_expert",
	"vision_expert",
	"math_expert",
	"tool_expert",
	"translation_expert",
	"summary",
	"run_self_test",
	]
	missing = [name for name in required_attrs if not hasattr(self.model, name)]
	if missing and self.strict_type_check:
	raise TypeError(
	"Loaded object does not look like a PackedLM instance. Missing: "
	+ ", ".join(missing)
	)

	def load(self) -> Any:
	if not self.checkpoint_path.exists():
	raise FileNotFoundError(f"Checkpoint not found: {self.checkpoint_path}")

	self._import_checkpoint_module()

	# The checkpoint is a full serialized object, so weights_only must be False.
	self.model = torch.load(
	self.checkpoint_path,
	map_location=self.map_location,
	weights_only=self.weights_only,
	)
	self._validate_model()
	return self.model

	def reload(self) -> Any:
	self.model = None
	return self.load()

	def __getattr__(self, name: str) -> Any:
	# Delegate unknown attributes to the loaded PackedLM object.
	if name in {"model", "checkpoint_path", "packedlm_module", "map_location", "weights_only", "strict_type_check"}:
	return super().__getattribute__(name)
	if self.model is not None and hasattr(self.model, name):
	return getattr(self.model, name)
	raise AttributeError(name)

	def __getitem__(self, key: str) -> Any:
	return self.model[key]

	def __contains__(self, key: str) -> bool:
	return key in self.model

	@property
	def last_expert(self) -> str:
	return getattr(self.model, "last_expert", "")

	def experts(self) -> List[str]:
	return list(self.model.keys())

	def summary(self) -> Dict[str, Any]:
	return self.model.summary()

	def creative(
	self,
	prompt: str,
	**kwargs: Any,
	) -> str:
	return self.model.creative_expert(prompt, **kwargs)

	def code(
	self,
	prompt: str,
	**kwargs: Any,
	) -> str:
	return self.model.code_expert(prompt, **kwargs)

	def logic(
	self,
	prompt: str,
	mode: str = "deep_then_answer",
	**kwargs: Any,
	) -> str:
	return self.model.logic_expert(prompt, mode=mode, **kwargs)

	def role(
	self,
	prompt: str,
	**kwargs: Any,
	) -> str:
	return self.model.role_expert(prompt, **kwargs)

	def affect(
	self,
	text: str,
	**kwargs: Any,
	) -> str:
	return self.model.affect_expert(text, **kwargs)

	def head(
	self,
	prompt: str,
	image: Optional[str] = None,
	**kwargs: Any,
	) -> str:
	return self.model.head_expert(prompt, image=image, **kwargs)

	def vision(
	self,
	prompt: str,
	image: Optional[str] = None,
	**kwargs: Any,
	) -> str:
	return self.model.vision_expert(prompt, image=image, **kwargs)

	def math(
	self,
	prompt: str,
	**kwargs: Any,
	) -> str:
	text=self.model.math_expert(prompt, **kwargs)
	text=collapse_repeated_lines(text)
	return text

	def tool(
	self,
	query: str,
	tools: Optional[List[Dict[str, Any]]] = None,
	**kwargs: Any,
	) -> str:
	return self.model.tool_expert(query, tools=tools, **kwargs)

	def translation(self, text: str) -> str:
	return self.model.translation_expert(text)

	def reload_expert(self, expert_name: str) -> Any:
	return self.model.reload_expert(expert_name)

	def unload_expert(self, expert_name: str) -> None:
	return self.model.unload_expert(expert_name)

	def unload_all(self) -> None:
	return self.model.unload_all()

	def save_checkpoint(self, path: Union[str, Path] = "LM.pt") -> None:
	return self.model.save_checkpoint(str(path))

	def run_self_test(self, image_test_source: Optional[str] = None) -> Dict[str, Any]:
	return self.model.run_self_test(image_test_source=image_test_source)

	def __repr__(self) -> str:
	model_name = type(self.model).__name__ if self.model is not None else "<unloaded>"
	return f"PackedLMCheckpointRuntime(checkpoint_path={self.checkpoint_path!s}, model={model_name})"

	def load_packedlm(
	checkpoint_path: Union[str, Path] = "LM.pt",
	packedlm_module: Optional[str] = None,
	map_location: str = "cpu",
	weights_only: bool = False,
	strict_type_check: bool = True,
	) -> PackedLMCheckpointRuntime:
	"""Load a PackedLM checkpoint into a runtime wrapper."""
	return PackedLMCheckpointRuntime(
	checkpoint_path=checkpoint_path,
	packedlm_module=packedlm_module,
	map_location=map_location,
	weights_only=weights_only,
	strict_type_check=strict_type_check,
	)

	@dataclass
	class RouteStep:
	expert: str
	sub_prompt: str
	goal: str
	kwargs: Dict[str, Any] = field(default_factory=dict)


	@dataclass
	class ExecutionContext:
	prompt: str
	image: Optional[str] = None
	tools: Optional[List[Dict[str, Any]]] = None
	deep_think: bool = False
	fast_think: bool = False
	think_blocks: Dict[str, str] = field(default_factory=dict)
	response_goal: Dict[str, Any] = field(default_factory=dict)
	route: List["RouteStep"] = field(default_factory=list)
	step_results: List[Dict[str, Any]] = field(default_factory=list)
	base_response: str = ""
	affective_state: Dict[str, Any] = field(default_factory=dict)
	final_response: str = ""
	final_review: Dict[str, Any] = field(default_factory=dict)
	command_context: Dict[str, Any] = field(default_factory=dict)


	class PackedLLM(nn.ModuleDict):
	MODEL_EXPERTS: List[str] = [
	"head_expert", "affect_expert", "role_expert", "creative_expert",
	"code_expert", "logic_expert", "math_expert", "vision_expert",
	"tool_expert", "translation_expert",
	]
	PIPELINE_EXPERTS: List[str] = ["action_expert", "web_expert"]
	REQUIRED_EXPERTS: List[str] = MODEL_EXPERTS + PIPELINE_EXPERTS

	MAX_STEP_RETRIES: int = 3
	MAX_ACTION_ATTEMPTS: int = 3
	MAX_WEB_ROUNDS: int = 3

	_CHECKPOINT_FORMAT_VERSION: int = 3
	_STAGE_SETTINGS: Dict[str, Dict[str, Any]] = {
	"head_plan_response_goal": {"temperature": 0.2},
	"head_build_route": {"temperature": 0.5},
	"head_retry_or_reroute": {"temperature": 0.8, "top_p": 0.9},
	"head_plan_detour": {"temperature": 1.0, "top_p": 0.95},
	"head_synthesize_base": {"temperature": 1.0, "top_p": 0.95},
	"head_review_final_response": {"temperature": 0.0},
	"head_action_review": {"temperature": 1.0, "top_p": 0.95},
	"head_web_queries": {"temperature": 0.5},
	"head_web_answer_subquery": {"temperature": 0.5},
	"head_web_review": {"temperature": 1.0, "top_p": 0.95},
	"head_web_synthesis": {"temperature": 0.8, "top_p": 0.9},
	"head_validate_response": {"temperature": 0.8, "top_p": 0.9},
	"affect_evaluate_step": {"temperature": 0.2},
	"affect_build_affective_state": {"temperature": 0.5},
	"role_apply_persona": {"temperature": 1.0, "top_p": 0.95},
	"logic_action_planning": {"temperature": 0.0},
	"code_action_generation": {"temperature": 0.0},
	"logic_action_repair": {"temperature": 0.0},
	"deep_think": {"temperature": 0.0},
	}
	def __init__(
	self,
	bot_id: Optional[str] = None,
	user_id: Optional[str] = None,
	model_dir: str = "models",
	memory_dir: Optional[str] = None,
	web: bool = False,
	hardware_probe: bool = True,
	expert_modules: Optional[Dict[str, nn.Module]] = None,
	packedlm_checkpoint: Optional[str] = "LM.pt",
	packedlm_module: Optional[str] = "PackedLM",
	):
	super().__init__()

	self.bot_id = bot_id
	self.user_id = user_id
	self.model_dir = model_dir
	self.memory_dir = memory_dir
	self._hardware_probe_enabled = hardware_probe

	self.packedlm_checkpoint = self._resolve_local_path(packedlm_checkpoint)
	self.packedlm_module = packedlm_module

	self._memory_bank: Any = None
	self._bot_profile: Dict[str, Any] = {}
	self._user_profile: Dict[str, Any] = {}
	self._hardware_state: Dict[str, Any] = {}
	self._web: Any = None
	self._codebox: Any = None

	self._packedlm_runtime: Optional[Any] = None
	self._runtime_expert_names: List[str] = []

	self._init_packedlm_runtime()

	if self._packedlm_runtime is None:
	self._build_experts(expert_modules)

	self._init_memory()
	self._load_profiles()

	if hardware_probe:
	self._probe_hardware()

	if web:
	self._attach_web()

	# ------------------------------------------------------------------
	# Pickle support – live handles must never be serialised
	# ------------------------------------------------------------------

	def __getstate__(self):
	state = self.__dict__.copy()
	state["_packedlm_runtime"] = None
	state["_web"] = None
	state["_codebox"] = None
	state["_memory_bank"] = None
	return state

	def __setstate__(self, state):
	self.__dict__.update(state)
	self._runtime_expert_names = self.__dict__.get("_runtime_expert_names", [])
	self._packedlm_runtime = None
	self._web = None
	self._codebox = None
	self._memory_bank = None
	self._init_packedlm_runtime()

	# ------------------------------------------------------------------
	# Path helpers
	# ------------------------------------------------------------------

	def _resolve_local_path(self, path: Optional[str]) -> Optional[str]:
	if not path:
	return None
	p = Path(path)
	if p.is_absolute():
	return str(p)
	script_dir = Path(__file__).resolve().parent
	candidate = script_dir / p
	if candidate.exists():
	return str(candidate)
	return str(p.resolve())

	def _project_root(self) -> Path:
	try:
	return Path(__file__).resolve().parent
	except Exception:
	return Path.cwd()

	# ------------------------------------------------------------------
	# Initialisation helpers
	# ------------------------------------------------------------------

	def _init_packedlm_runtime(self) -> None:
	if not self.packedlm_checkpoint or PackedLMCheckpointRuntime is None:
	return
	if not os.path.exists(self.packedlm_checkpoint):
	return

	try:
	self._packedlm_runtime = PackedLMCheckpointRuntime(
	checkpoint_path=self.packedlm_checkpoint,
	packedlm_module=self.packedlm_module,
	map_location="cpu",
	weights_only=False,
	strict_type_check=True,
	)
	raw_names = list(self._packedlm_runtime.experts())
	self._runtime_expert_names = _expert_names_canonical(raw_names)
	except Exception as exc:
	print(f"[PackedLLM] Warning: could not load PackedLM runtime: {exc}")
	self._packedlm_runtime = None
	self._runtime_expert_names = []

	def _build_experts(self, expert_modules: Optional[Dict[str, nn.Module]]) -> None:
	if expert_modules:
	for key, module in expert_modules.items():
	self[key] = module
	return

	class_map = {
	"head_expert": ("HeadExpert", {"model_dir": self.model_dir}),
	"affect_expert": ("AffectExpert", {"model_dir": self.model_dir}),
	"role_expert": ("RoleExpert", {"model_dir": self.model_dir}),
	"creative_expert": ("CreativeExpert", {"model_dir": self.model_dir}),
	"code_expert": ("CodeExpert", {"model_dir": self.model_dir}),
	"logic_expert": ("LogicExpert", {"model_dir": self.model_dir}),
	"math_expert": ("MathExpert", {"model_dir": self.model_dir}),
	"vision_expert": ("VisionExpert", {"model_dir": self.model_dir}),
	"tool_expert": ("ToolExpert", {"model_dir": self.model_dir}),
	"translation_expert": ("TranslationExpert", {"model_dir": self.model_dir}),
	"action_expert": ("ActionExpert", {"model_dir": self.model_dir}),
	"web_expert": ("WebExpert", {"model_dir": self.model_dir}),
	}

	frame_globals: Dict[str, Any] = {}
	try:
	frame = inspect.stack()[2].frame
	frame_globals = frame.f_globals
	except Exception:
	pass

	for key, (cls_name, kwargs) in class_map.items():
	cls = frame_globals.get(cls_name) or builtins.__dict__.get(cls_name)
	if cls is not None:
	try:
	self[key] = cls(**kwargs)
	except Exception as exc:
	print(f"[PackedLLM] Warning: could not instantiate {cls_name}: {exc}")

	def _init_memory(self) -> None:
	if MemoryBank is None:
	return
	try:
	root = Path(self.memory_dir).expanduser().resolve() if self.memory_dir else self._project_root()
	self._memory_bank = MemoryBank(
	gator_location=root,
	build_if_missing=False,
	)
	if hasattr(self._memory_bank, "gator") and hasattr(self._memory_bank.gator, "set_lazy"):
	self._memory_bank.gator.set_lazy(True)
	except Exception as exc:
	print(f"[PackedLLM] Warning: MemoryBank degraded mode: {exc}")
	self._memory_bank = None

	def _load_profiles(self) -> None:
	if self._memory_bank is None:
	return

	if self.bot_id:
	try:
	self._bot_profile = _safe_call(self._memory_bank, "get_profile", "bot", self.bot_id, default={}) or {}
	except Exception:
	self._bot_profile = {}

	if self.user_id:
	try:
	self._user_profile = _safe_call(self._memory_bank, "get_profile", "user", self.user_id, default={}) or {}
	except Exception:
	self._user_profile = {}

	def _probe_hardware(self) -> None:
	state: Dict[str, Any] = {
	"platform": platform.system(),
	"python_version": platform.python_version(),
	"cpu_count": os.cpu_count(),
	}

	if psutil is not None:
	try:
	vm = psutil.virtual_memory()
	state["ram_total_gb"] = round(vm.total / 1e9, 1)
	state["ram_available_gb"] = round(vm.available / 1e9, 1)
	state["ram_percent_used"] = vm.percent
	except Exception:
	pass

	if torch.cuda.is_available():
	try:
	state["gpu_name"] = torch.cuda.get_device_name(0)
	props = torch.cuda.get_device_properties(0)
	state["gpu_vram_total_gb"] = round(props.total_memory / 1e9, 1)
	state["gpu_vram_used_gb"] = round(torch.cuda.memory_allocated(0) / 1e9, 2)
	except Exception:
	pass
	else:
	state["gpu"] = "none"

	self._hardware_state = state

	def _attach_web(self) -> None:
	try:
	if Web is None:
	raise RuntimeError("CompileWeb.Web unavailable")
	self._web = Web()
	except Exception as exc:
	print(f"[PackedLLM] Warning: Web module unavailable: {exc}")
	self._web = None

	def _get_codebox(self) -> Any:
	if self._codebox is not None:
	return self._codebox

	if Box is not None:
	try:
	self._codebox = Box()
	return self._codebox
	except Exception as exc:
	print(f"[PackedLLM] Warning: Box unavailable: {exc}")

	if CodeBox is not None:
	try:
	self._codebox = CodeBox()
	return self._codebox
	except Exception as exc:
	print(f"[PackedLLM] Warning: CodeBox unavailable: {exc}")

	return None

	def _stage_kwargs(self, stage: str) -> Dict[str, Any]:
	return dict(self._STAGE_SETTINGS.get(stage, {}))

	def _inject_think_blocks(
	self,
	ctx: ExecutionContext,
	*,
	stage: str,
	target_expert: str,
	task_prompt: str,
	output_contract: str,
	constraints: Optional[List[str]] = None,
	) -> str:
	if not ctx.deep_think or target_expert == "translation_expert":
	return task_prompt

	cache_key = f"{stage}::{target_expert}::{hash(task_prompt) & 0xFFFFFFFF}"
	if cache_key in ctx.think_blocks:
	think = ctx.think_blocks[cache_key]
	else:
	constraints_text = "\n".join(f"- {c}" for c in (constraints or [])) or "- none"

	think_prompt = (
	"You are LogicExpert. Produce PRIVATE planning blocks only.\n"
	"Return ONLY <think>...</think> blocks.\n"
	"Do not answer the task. Do not produce JSON. Do not produce prose.\n"
	"Make the blocks task-specific, brief, and directly useful to the target expert.\n\n"
	f"Stage: {stage}\n"
	f"Target expert: {target_expert}\n"
	f"Task prompt:\n{task_prompt}\n\n"
	f"Output contract:\n{output_contract}\n\n"
	f"Constraints:\n{constraints_text}\n\n"
	"Generate 2 to 4 blocks covering: objective, risks, format, and the safest path."
	)

	raw = self._call_expert("logic_expert", think_prompt, **self._stage_kwargs("deep_think"))
	think = self._extract_think_blocks(raw)
	ctx.think_blocks[cache_key] = think

	if think.strip():
	return (
	"<think_blocks>\n"
	f"{think.strip()}\n"
	"</think_blocks>\n\n"
	f"{task_prompt}"
	)
	return task_prompt

	def _extract_think_blocks(self, text: str) -> str:
	if not text:
	return ""
	blocks = re.findall(
	r"<think>.*?</think>",
	text,
	flags=re.IGNORECASE \| re.DOTALL,
	)
	if blocks:
	return "\n".join(
	b.strip()
	for b in blocks
	if b.strip()
	)
	text = text.strip()
	if not text:
	return ""
	return f"<think>{text}</think>"

	def _route_allowed(self, ctx: ExecutionContext, expert: str) -> bool:
	if ctx.fast_think:
	return expert in {"head_expert", "role_expert", "translation_expert"}
	return True

	def _fast_path_response(self, ctx: ExecutionContext) -> str:

	prompt = (
	"Answer the user's request directly and as fast as possible.\n"
	"Do not call tools. Do not browse. Do not write JSON.\n"
	"Do not add meta commentary.\n\n"
	f"User prompt: {ctx.prompt}"
	)
	return self._call_expert("head_expert", prompt, **self._stage_kwargs("head_synthesize_base")).strip()

	def _fast_apply_persona(self, ctx: ExecutionContext, base_response: str) -> str:
	if not self._bot_profile:
	return base_response

	character_card = self._bot_profile.get("character_card", "")
	user_name = self._user_profile.get("name", "the user")

	prompt = (
	f"<instructions>\nRewrite the base response in the character voice while preserving facts.\n"
	f"Stay concise and accurate.\n</instructions>\n\n"
	f"<character_card>{character_card}</character_card>\n"
	f"<recipient>{user_name}</recipient>\n"
	f"<original_prompt>{ctx.prompt}</original_prompt>\n"
	f"<base_response>{base_response}</base_response>"
	)
	return self._call_expert("role_expert", prompt,
	**self._stage_kwargs("role_apply_persona")).strip() or base_response

	# ------------------------------------------------------------------
	# Forward pass
	# ------------------------------------------------------------------

	def forward(
	self,
	prompt: str,
	image: Optional[str] = None,
	tools: Optional[List[Dict[str, Any]]] = None,
	stream: bool = False,
	deep_think: bool = False,
	fast_think: bool = False,
	) -> Union[str, Generator[str, None, None]]:
	ctx = ExecutionContext(
	prompt=prompt,
	image=image,
	tools=tools,
	deep_think=deep_think,
	fast_think=fast_think,
	)

	# Fast path: minimal work, no memory, no action, no web.
	if fast_think:
	base = self._fast_path_response(ctx)
	final = self._fast_apply_persona(ctx, base) if self._bot_profile else base
	if stream:
	return self._stream_response(final)
	return final

	self._plan_response_goal(ctx)
	self._consult_commands(ctx)
	self._build_route(ctx)
	self._execute_route(ctx)
	self._synthesize_base(ctx)
	self._build_affective_state(ctx)
	self._apply_persona(ctx)
	self._review_final_response(ctx)
	self._finalize(ctx)

	if stream:
	return self._stream_response(ctx.final_response)
	return ctx.final_response

	# ------------------------------------------------------------------
	# Expert dispatch
	# ------------------------------------------------------------------

	def _call_expert(self, key: str, args: Any, *kwargs: Any) -> str:
	# Normalise key to snake_case so callers don't need to worry.
	key = _normalise_expert_name(key)

	if self._packedlm_runtime is not None:
	dispatch = {
	"creative_expert": self._packedlm_runtime.creative,
	"code_expert": self._packedlm_runtime.code,
	"logic_expert": self._packedlm_runtime.logic,
	"role_expert": self._packedlm_runtime.role,
	"affect_expert": self._packedlm_runtime.affect,
	"head_expert": self._packedlm_runtime.head,
	"vision_expert": self._packedlm_runtime.vision,
	"math_expert": self._packedlm_runtime.math,
	"tool_expert": self._packedlm_runtime.tool,
	"translation_expert": self._packedlm_runtime.translation,
	"action_expert": getattr(self._packedlm_runtime, "action", None),
	"web_expert": getattr(self._packedlm_runtime, "web", None),
	}
	fn = dispatch.get(key)
	if fn is None:
	return ""
	try:
	result = fn(args, *kwargs)
	return str(result) if result is not None else ""
	except Exception as exc:
	print(f"[PackedLLM] _call_expert(runtime:{key}) error: {exc}")
	return ""

	expert = self._get_expert(key)
	if expert is None:
	return ""
	try:
	result = expert(args, *kwargs)
	print(result)
	return str(result) if result is not None else ""
	except Exception as exc:
	print(f"[PackedLLM] _call_expert({key}) error: {exc}")
	return ""

	# ------------------------------------------------------------------
	# Pipeline stages
	# ------------------------------------------------------------------

	def _plan_response_goal(self, ctx: ExecutionContext) -> None:
	prompt = (
	"You are a response planner.\n"
	"Return ONLY a JSON object with exactly these keys:\n"
	"intent (string), tone (string), success (string), constraints (array), "
	"needs_vision (boolean), needs_web (boolean), needs_action (boolean).\n"
	"Do not add commentary, markdown, or extra keys.\n"
	"Always respond in English unless the task is explicitly a translation task.\n"
	"TranslationExpert is Chinese→English only; only flag translation-related routing when "
	"the source text is actually Chinese.\n\n"
	f"User prompt: {ctx.prompt}"
	)
	prompt = self._inject_think_blocks(
	ctx,
	stage="head_plan_response_goal",
	target_expert="head_expert",
	task_prompt=prompt,
	output_contract="JSON object with intent/tone/success/constraints/needs_vision/needs_web/needs_action.",
	)
	raw = self._call_expert("head_expert", prompt, image=ctx.image if ctx.image else None,
	**self._stage_kwargs("head_plan_response_goal"))
	ctx.response_goal = _parse_json_safe(raw) or {
	"intent": ctx.prompt,
	"tone": "helpful",
	"success": "Answer the user helpfully.",
	"constraints": [],
	"needs_vision": False,
	"needs_web": False,
	"needs_action": False,
	}

	def _consult_commands(self, ctx: ExecutionContext) -> None:
	if ctx.fast_think or self._memory_bank is None:
	ctx.command_context = {"executed": [], "results": [], "coverage": "none"}
	return

	command_context = {
	"executed": [],
	"results": [],
	"coverage": "none",
	}

	gator = getattr(self._memory_bank, "gator", None)
	if gator is None:
	ctx.command_context = command_context
	return

	maybe_execute = getattr(gator, "maybe_execute_commands", None)
	if callable(maybe_execute):
	try:
	result = maybe_execute(ctx.prompt, ctx.response_goal)
	if isinstance(result, dict):
	command_context.update(result)
	except Exception as exc:
	command_context["results"].append({"error": str(exc)})
	else:
	try:
	if hasattr(gator, "process_actions"):
	routed = gator.process_actions(
	ctx.prompt,
	user_id=self.user_id or "",
	bot_id=self.bot_id or "",
	history="",
	location="",
	time_date="",
	)
	if isinstance(routed, dict):
	command_context["executed"] = routed.get("commands", [])
	command_context["results"] = routed.get("retrieved_data", {})
	if routed.get("commands") or routed.get("retrieved_data"):
	command_context["coverage"] = "partial"
	except Exception as exc:
	command_context["results"].append({"error": str(exc)})

	if command_context["coverage"] == "none":
	executed = command_context.get("executed") or []
	results = command_context.get("results") or []
	if executed or results:
	command_context["coverage"] = "partial"

	ctx.command_context = command_context

	def _build_route(self, ctx: ExecutionContext) -> None:
	if ctx.fast_think:
	ctx.route = [RouteStep(expert="head_expert", sub_prompt=ctx.prompt,
	goal="Produce the fastest useful answer possible.")]
	if self._bot_profile:
	ctx.route.append(RouteStep(expert="role_expert", sub_prompt=ctx.prompt, goal="Apply persona only."))
	return

	prompt = (
	"You are a response router.\n"
	"Return ONLY a JSON array of step objects.\n"
	"Each step object must include: expert (string), sub_prompt (string), goal (string), "
	"and optional kwargs (object).\n"
	"Use the fewest steps needed. Prefer web_expert for live information, action_expert for "
	"executable tasks, and head_expert for planning/validation.\n"
	"Always write sub_prompt and goal in English.\n"
	"Do not include duplicate or redundant steps.\n\n"
	f"Response goal: {json.dumps(ctx.response_goal, ensure_ascii=False, default=str)}\n"
	f"Command context: {json.dumps(ctx.command_context, ensure_ascii=False, default=str)}\n"
	f"Original prompt: {ctx.prompt}\n"
	f"Available experts: {', '.join(self.REQUIRED_EXPERTS)}"
	)
	prompt = self._inject_think_blocks(
	ctx,
	stage="head_build_route",
	target_expert="head_expert",
	task_prompt=prompt,
	output_contract="JSON array of routing steps.",
	)

	raw = self._call_expert("head_expert", prompt, **self._stage_kwargs("head_build_route"))
	steps_raw = _parse_json_safe(raw)

	route: List[RouteStep] = []
	if isinstance(steps_raw, list) and steps_raw:
	for s in steps_raw:
	if not isinstance(s, dict):
	continue
	expert = _normalise_expert_name(s.get("expert", "head_expert"))
	if not self._route_allowed(ctx, expert):
	continue
	route.append(
	RouteStep(
	expert=expert,
	sub_prompt=str(s.get("sub_prompt", ctx.prompt)),
	goal=str(s.get("goal", "Complete the sub-task.")),
	kwargs=s.get("kwargs", {}) if isinstance(s.get("kwargs", {}), dict) else {},
	)
	)

	if not route:
	goal = ctx.response_goal if isinstance(ctx.response_goal, dict) else {}
	if goal.get("needs_web") and self._route_allowed(ctx, "web_expert"):
	route.append(RouteStep(expert="web_expert", sub_prompt=ctx.prompt,
	goal="Gather and synthesize fresh external information."))
	if goal.get("needs_action") and self._route_allowed(ctx, "action_expert"):
	route.append(RouteStep(expert="action_expert", sub_prompt=ctx.prompt,
	goal="Execute the requested action or code workflow."))
	if goal.get("needs_vision") and ctx.image and self._route_allowed(ctx, "vision_expert"):
	route.append(
	RouteStep(expert="vision_expert", sub_prompt=ctx.prompt, goal="Interpret the provided image.",
	kwargs={"image": ctx.image}))
	route.append(
	RouteStep(expert="head_expert", sub_prompt=ctx.prompt, goal="Produce a complete, helpful response."))

	# Remove redundant consecutive steps.
	compressed: List[RouteStep] = []
	for step in route:
	if compressed and compressed[-1].expert == step.expert and step.expert != "translation_expert":
	continue
	compressed.append(step)
	ctx.route = compressed

	def _execute_route(self, ctx: ExecutionContext) -> None:
	i = 0
	while i < len(ctx.route):
	step = ctx.route[i]

	if step.expert == "action_expert":
	result = self._run_action_pipeline(ctx, step)
	ctx.step_results.append({
	"expert": step.expert,
	"sub_prompt": step.sub_prompt,
	"result": result,
	"passed": bool(result.get("passed", False)),
	"action": result.get("action", "action_pipeline"),
	})
	i += 1
	continue

	if step.expert == "web_expert":
	result = self._run_web_pipeline(ctx, step)
	ctx.step_results.append({
	"expert": step.expert,
	"sub_prompt": step.sub_prompt,
	"result": result,
	"passed": bool(result.get("passed", False)),
	"action": result.get("action", "web_pipeline"),
	})
	i += 1
	continue

	retries = 0
	while True:
	result = self._execute_step(step, ctx)
	passed = self._evaluate_step(result, step, ctx)

	if passed:
	ctx.step_results.append({
	"expert": step.expert,
	"sub_prompt": step.sub_prompt,
	"result": result,
	"passed": True,
	})
	break

	retries += 1
	action = self._retry_or_reroute(step, retries, result, ctx)

	if action == "retry" and retries < self.MAX_STEP_RETRIES:
	continue

	if action == "detour":
	new_steps = self._plan_detour(step, result, ctx)
	ctx.route = ctx.route[:i + 1] + new_steps + ctx.route[i + 1:]
	ctx.step_results.append({
	"expert": step.expert,
	"sub_prompt": step.sub_prompt,
	"result": result,
	"passed": False,
	"action": "detour",
	})
	break

	ctx.step_results.append({
	"expert": step.expert,
	"sub_prompt": step.sub_prompt,
	"result": result,
	"passed": False,
	"action": "skip",
	})
	break

	i += 1

	def _execute_step(self, step: RouteStep, ctx: ExecutionContext) -> str:
	kwargs = dict(step.kwargs)

	if step.expert in ("head_expert", "vision_expert") and ctx.image:
	kwargs.setdefault("image", ctx.image)
	if step.expert == "tool_expert" and ctx.tools:
	kwargs.setdefault("tools", ctx.tools)

	if step.expert == "translation_expert":
	# Translation stays direct; no deep-think wrapper.
	kwargs = {}
	if not CHINESE_RE.search(step.sub_prompt or "") and not CHINESE_RE.search(ctx.prompt or ""):
	return step.sub_prompt

	prompt_text = step.sub_prompt
	if ctx.deep_think and step.expert != "translation_expert":
	prompt_text = self._inject_think_blocks(
	ctx,
	stage=f"{step.expert}:{step.goal}",
	target_expert=step.expert,
	task_prompt=step.sub_prompt,
	output_contract="Task-specific expert response.",
	constraints=[step.goal],
	)

	return self._call_expert(step.expert, prompt_text, **kwargs)

	def _evaluate_step(self, result: str, step: RouteStep, ctx: ExecutionContext) -> bool:
	meta = (
	"Evaluate whether the following result meets the stated goal. "
	"Return ONLY JSON: {\"pass\": true/false, \"reason\": \"...\"}.\n\n"
	f"Goal: {step.goal}\n"
	f"Result: {result[:500]}"
	)
	raw = self._call_expert("affect_expert", meta)
	parsed = _parse_json_safe(raw)
	if isinstance(parsed, dict):
	return bool(parsed.get("pass", True))
	return True

	def _retry_or_reroute(self, step: RouteStep, retry_count: int, result: str, ctx: ExecutionContext) -> str:
	options = ["detour", "skip"]
	if retry_count < self.MAX_STEP_RETRIES:
	options = ["retry"] + options

	meta = (
	"A pipeline step has failed its quality check. Decide what to do.\n"
	f"Failed expert: {step.expert}\n"
	f"Sub-prompt: {step.sub_prompt}\n"
	f"Step goal: {step.goal}\n"
	f"Result so far: {result[:300]}\n"
	f"Retry count: {retry_count}\n"
	f"Available options: {options}\n"
	"Return ONLY JSON: {\"action\": \"<option>\"}"
	)
	raw = self._call_expert("head_expert", meta)
	parsed = _parse_json_safe(raw)
	if isinstance(parsed, dict):
	action = parsed.get("action", "skip")
	if action in options:
	return action
	return "skip"

	def _plan_detour(self, failed_step: RouteStep, result: str, ctx: ExecutionContext) -> List[RouteStep]:
	meta = (
	"A pipeline step failed and a detour is needed. "
	"Return ONLY a JSON array of replacement step objects "
	"(each with: expert, sub_prompt, goal, kwargs optional).\n\n"
	f"Failed step expert: {failed_step.expert}\n"
	f"Failed step goal: {failed_step.goal}\n"
	f"Partial result: {result[:300]}\n"
	f"Original prompt: {ctx.prompt}\n"
	f"Available experts: {', '.join(self.REQUIRED_EXPERTS)}"
	)
	raw = self._call_expert("head_expert", meta)
	steps_raw = _parse_json_safe(raw)
	if isinstance(steps_raw, list):
	return [
	RouteStep(
	expert=_normalise_expert_name(s.get("expert", "head_expert")),
	sub_prompt=s.get("sub_prompt", ctx.prompt),
	goal=s.get("goal", "Complete the sub-task."),
	kwargs=s.get("kwargs", {}) if isinstance(s.get("kwargs", {}), dict) else {},
	)
	for s in steps_raw
	if isinstance(s, dict)
	]
	return [
	RouteStep(
	expert="head_expert",
	sub_prompt=ctx.prompt,
	goal="Produce a fallback response.",
	)
	]

	def _synthesize_base(self, ctx: ExecutionContext) -> None:
	results_summary = "\n".join(
	f"[{r['expert']}]: {r['result']}"
	for r in ctx.step_results
	if r.get("result") is not None
	)

	prompt = (
	"You are synthesizing the content-only base response.\n"
	"Use the expert results and original prompt to produce a complete, concise answer.\n"
	"Do NOT apply any persona or character voice.\n"
	"Do NOT return JSON.\n"
	"Always respond in English unless the task is explicitly a translation task.\n\n"
	f"Original prompt: {ctx.prompt}\n\n"
	f"Expert results:\n{results_summary}\n\n"
	f"Response goal: {json.dumps(ctx.response_goal, ensure_ascii=False, default=str)}"
	)
	prompt = self._inject_think_blocks(
	ctx,
	stage="head_synthesize_base",
	target_expert="head_expert",
	task_prompt=prompt,
	output_contract="Plain natural-language answer only.",
	)
	raw = self._call_expert("head_expert", prompt, **self._stage_kwargs("head_synthesize_base"))
	ctx.base_response = raw.strip() if raw.strip() else (results_summary or ctx.prompt)

	def _build_affective_state(self, ctx: ExecutionContext) -> None:
	if ctx.fast_think or (not self.bot_id and not self.user_id):
	ctx.affective_state = {}
	return

	prompt = (
	"You are an affective state module.\n"
	"Return ONLY a JSON object with two keys: emotional_state and physical_state.\n"
	"emotional_state must include emotion, mood, sentiment, and disposition_toward_user.\n"
	"physical_state should reflect the provided hardware metrics.\n\n"
	f"Conversation summary: User said: {ctx.prompt}\n"
	f"Base response: {ctx.base_response[:500]}\n"
	f"User profile: {json.dumps(self._user_profile, ensure_ascii=False, default=str)}\n"
	f"Bot profile: {json.dumps(self._bot_profile, ensure_ascii=False, default=str)}\n"
	f"Hardware state: {json.dumps(self._hardware_state, ensure_ascii=False, default=str)}"
	)
	raw = self._call_expert("affect_expert", prompt, **self._stage_kwargs("affect_build_affective_state"))
	ctx.affective_state = _parse_json_safe(raw) or {}

	def _apply_persona(self, ctx: ExecutionContext) -> None:
	if not self._bot_profile:
	ctx.final_response = ctx.base_response
	return

	character_card = self._bot_profile.get("character_card", "")
	user_name = self._user_profile.get("name", "the user")

	prompt = (
	f"You are {character_card}\n\n"
	f"You are responding to {user_name}.\n\n"
	f"Original prompt: {ctx.prompt}\n\n"
	f"Base response to rewrite: {ctx.base_response}\n\n"
	f"Your current emotional state: {json.dumps(ctx.affective_state.get('emotional_state', {}), ensure_ascii=False, default=str)}\n"
	f"Your current physical state: {json.dumps(ctx.affective_state.get('physical_state', {}), ensure_ascii=False, default=str)}\n\n"
	"Rewrite the base response in your own voice, naturally, as yourself.\n"
	"Keep all factual content intact.\n"
	"IMPORTANT: Respond in English unless the character card explicitly specifies another language."
	)
	prompt = self._inject_think_blocks(
	ctx,
	stage="role_apply_persona",
	target_expert="role_expert",
	task_prompt=prompt,
	output_contract="Persona-rewritten natural-language answer.",
	)

	rewritten = self._call_expert("role_expert", prompt, character_card=character_card,
	**self._stage_kwargs("role_apply_persona"))
	if self._validate_response(rewritten, ctx):
	ctx.final_response = rewritten
	else:
	ctx.final_response = ctx.base_response

	def _review_final_response(self, ctx: ExecutionContext) -> None:
	if ctx.fast_think:
	ctx.final_review = {"verdict": "accept", "reason": "fast_think path", "revised_response": None}
	return

	prompt = (
	"You are performing the final review.\n"
	"Return ONLY a JSON object with keys:\n"
	"verdict (accept\|revise\|reject), reason (string), revised_response (string\|null),\n"
	"memory_facts (array), user_profile_updates (object), bot_profile_updates (object).\n"
	"Do not add extra keys or commentary.\n"
	"The revised_response must be in English unless the task explicitly required another language.\n\n"
	f"Original prompt: {ctx.prompt}\n"
	f"Response goal: {json.dumps(ctx.response_goal, ensure_ascii=False, default=str)}\n"
	f"Base response: {ctx.base_response}\n"
	f"Persona response: {ctx.final_response}\n"
	f"Affective state: {json.dumps(ctx.affective_state, ensure_ascii=False, default=str)}\n"
	f"User profile: {json.dumps(self._user_profile, ensure_ascii=False, default=str)}\n"
	f"Bot profile: {json.dumps(self._bot_profile, ensure_ascii=False, default=str)}\n"
	f"Command context: {json.dumps(ctx.command_context, ensure_ascii=False, default=str)}"
	)
	prompt = self._inject_think_blocks(
	ctx,
	stage="head_review_final_response",
	target_expert="head_expert",
	task_prompt=prompt,
	output_contract="JSON review object.",
	)

	raw = self._call_expert("head_expert", prompt, **self._stage_kwargs("head_review_final_response"))
	review = _parse_json_safe(raw) or {}
	if not isinstance(review, dict):
	review = {}

	verdict = str(review.get("verdict", "accept")).lower()
	revised = review.get("revised_response")

	if verdict == "revise" and isinstance(revised, str) and revised.strip():
	ctx.final_response = revised.strip()
	elif verdict == "reject":
	ctx.final_response = ctx.base_response

	ctx.final_review = review

	def _finalize(self, ctx: ExecutionContext) -> None:
	if ctx.fast_think:
	return
	self._write_memory(ctx)
	self._update_user_profile(ctx)
	self._update_bot_profile(ctx)
	# ------------------------------------------------------------------
	# Action pipeline
	# ------------------------------------------------------------------

	def _run_action_pipeline(self, ctx: ExecutionContext, step: RouteStep) -> Dict[str, Any]:
	codebox = self._get_codebox()
	if codebox is None:
	return {
	"passed": False,
	"action": "unavailable",
	"output": "CodeBox is unavailable.",
	}

	action_request = step.sub_prompt or ctx.prompt
	imports_map = {
	"PIL": "Pillow",
	"bs4": "beautifulsoup4",
	"cv2": "opencv-python",
	"yaml": "PyYAML",
	"sklearn": "scikit-learn",
	}

	last_result: Dict[str, Any] = {"passed": False, "action": "unavailable"}

	for attempt in range(1, self.MAX_ACTION_ATTEMPTS + 1):
	plan_prompt = self._prompt_action_planning(ctx, action_request, step.goal)
	raw_plan = self._call_expert(
	"logic_expert",
	plan_prompt,
	mode="deep_then_answer",
	**self._stage_kwargs("logic_action_planning"),
	)

	plan = _parse_json_safe(raw_plan) or {}
	if not isinstance(plan, dict):
	plan = {}

	architecture = plan.get("architecture", {})
	if not isinstance(architecture, dict) or "main" not in architecture:
	architecture = {
	"main": {
	"signature": "def main():",
	"docstring": "Entry point.",
	"instructions": action_request,
	}
	}
	plan["architecture"] = architecture

	imports = plan.get("imports", [])
	if not isinstance(imports, list):
	imports = []

	required_pkgs: List[str] = []
	for imp in imports:
	if not isinstance(imp, str):
	continue
	root = imp.split(" as ", 1)[0].strip().split(".", 1)[0]
	required_pkgs.append(imports_map.get(root, root))

	venv_id = f"action_{abs(hash(action_request)) % 100000}_{attempt}"

	try:
	if required_pkgs:
	codebox.create_venv(venv_id, requirements=required_pkgs)
	else:
	codebox.create_venv(venv_id)

	generation_prompt = self._prompt_code_generation(ctx, action_request, step.goal, plan)
	generated_script = self._call_expert(
	"code_expert",
	generation_prompt,
	**self._stage_kwargs("code_action_generation"),
	)
	if not isinstance(generated_script, str) or not generated_script.strip():
	raise RuntimeError("CodeExpert returned an empty script.")

	generated_script = _extract_python_code(generated_script)
	if not generated_script.strip():
	raise RuntimeError("CodeExpert returned a script with no extractable valid Python.")

	if "request_report.txt" not in generated_script:
	generated_script += (
	"\n\nif __name__ == '__main__':\n"
	" with open('request_report.txt', 'w', encoding='utf-8') as f:\n"
	" f.write('Action completed.')\n"
	" print('Action completed.')\n"
	)

	if hasattr(codebox, "save_script"):
	codebox.save_script(f"action_{attempt}", venv_id, generated_script)

	run_result = codebox.run_code(
	generated_script,
	venv_id=venv_id,
	requirements=None,
	timeout=120,
	max_ram_mb=4096,
	)

	review_prompt = self._prompt_action_review(ctx, action_request, plan, run_result)
	review_raw = self._call_expert(
	"head_expert",
	review_prompt,
	**self._stage_kwargs("head_action_review"),
	)
	review = _parse_json_safe(review_raw) or {}
	verdict = str(review.get("verdict", "retry")).lower()
	feedback = review.get("feedback", "")

	if not (run_result.get("success") and verdict == "success"):
	print(f"[PackedLLM] action attempt {attempt} did not pass:")
	print(f" run_result.success={run_result.get('success')} verdict={verdict}")
	print(f" stdout: {str(run_result.get('stdout', ''))[:600]}")
	print(f" stderr: {str(run_result.get('stderr', ''))[:600]}")
	print(f" exit_code: {run_result.get('exit_code')}")
	print(f" review feedback: {feedback}")

	if run_result.get("success") and verdict == "success":
	return {
	"passed": True,
	"action": "success",
	"attempt": attempt,
	"plan": plan,
	"result": run_result,
	"review": review,
	"output": run_result.get("stdout", "") or "Action completed.",
	}

	last_result = {
	"passed": False,
	"action": verdict if verdict in {"retry", "abandon"} else "retry",
	"attempt": attempt,
	"plan": plan,
	"result": run_result,
	"review": review,
	"feedback": feedback,
	"script": generated_script,
	}

	if verdict == "abandon":
	break

	except Exception as exc:
	last_result = {
	"passed": False,
	"action": "retry" if attempt < self.MAX_ACTION_ATTEMPTS else "abandon",
	"attempt": attempt,
	"error": str(exc),
	"plan": plan,
	}

	if attempt < self.MAX_ACTION_ATTEMPTS:
	repair_prompt = self._prompt_action_repair(ctx, action_request, plan, last_result)
	_ = self._call_expert(
	"logic_expert",
	repair_prompt,
	mode="deep_then_answer",
	**self._stage_kwargs("logic_action_repair"),
	)

	return last_result

	# ------------------------------------------------------------------
	# Web pipeline
	# ------------------------------------------------------------------

	def _run_web_pipeline(self, ctx: ExecutionContext, step: RouteStep) -> Dict[str, Any]:
	if self._web is None:
	self._attach_web()
	if self._web is None:
	return {
	"passed": False,
	"action": "unavailable",
	"output": "Web module is unavailable.",
	}

	prompt = step.sub_prompt or ctx.prompt
	overall: Dict[str, Any] = {
	"passed": False,
	"action": "partial",
	"rounds": [],
	}

	queries_prompt = self._prompt_web_queries(ctx, prompt)
	raw_queries = self._call_expert(
	"head_expert",
	queries_prompt,
	**self._stage_kwargs("head_web_queries"),
	)
	subqueries = _parse_json_safe(raw_queries)
	if not isinstance(subqueries, list) or not subqueries:
	subqueries = [{"query": prompt, "description": "Direct search", "deep_search": False}]

	rounds = 0
	remaining_subqueries = subqueries

	while rounds < self.MAX_WEB_ROUNDS and remaining_subqueries:
	round_results = []
	for item in remaining_subqueries:
	if not isinstance(item, dict):
	continue

	query = str(item.get("query", prompt))
	deep_search = bool(item.get("deep_search", False))

	try:
	raw = self._web.search(query, deep_search=deep_search)
	except TypeError:
	raw = self._web.search(query)
	except Exception as exc:
	raw = f"WEB_ERROR: {exc}"

	text = raw if isinstance(raw, str) else _json_dumps(raw)
	if len(text) > 8000 and hasattr(self._web, "summarize"):
	try:
	text = self._web.summarize(text)
	except Exception:
	pass

	answer_prompt = self._prompt_web_answer_subquery(ctx, query, text, prompt)
	answer = self._call_expert(
	"head_expert",
	answer_prompt,
	**self._stage_kwargs("head_web_answer_subquery"),
	)

	round_results.append({
	"query": query,
	"description": item.get("description", ""),
	"evidence": text[:4000],
	"answer": answer,
	"deep_search": deep_search,
	})

	overall["rounds"].append(round_results)

	review_prompt = self._prompt_web_review(ctx, prompt, round_results)
	review_raw = self._call_expert(
	"head_expert",
	review_prompt,
	**self._stage_kwargs("head_web_review"),
	)
	review = _parse_json_safe(review_raw) or {}
	verdict = str(review.get("verdict", "sufficient")).lower()

	if verdict == "more_info":
	remaining_subqueries = review.get("new_subqueries", [])
	if not isinstance(remaining_subqueries, list):
	remaining_subqueries = []
	rounds += 1
	continue

	if verdict == "abandon":
	break

	overall["passed"] = True
	overall["action"] = "sufficient"
	break

	if overall["passed"]:
	synthesis_prompt = self._prompt_web_synthesis(ctx, prompt, overall["rounds"])
	synthesis = self._call_expert(
	"head_expert",
	synthesis_prompt,
	**self._stage_kwargs("head_web_synthesis"),
	)
	overall["output"] = synthesis
	else:
	overall["output"] = "Partial web research completed."

	return overall

	# ------------------------------------------------------------------
	# Validation / finalisation helpers
	# ------------------------------------------------------------------

	def _validate_response(self, response: str, ctx: ExecutionContext) -> bool:
	meta = (
	"Validate this response against the criteria below. "
	"Return ONLY JSON: {\"valid\": true/false, \"reason\": \"...\"}.\n\n"
	f"Success criterion: {ctx.response_goal.get('success', 'Be helpful.')}\n"
	f"Character card: {self._bot_profile.get('character_card', 'None')}\n"
	f"Response to validate: {response[:600]}"
	)
	raw = self._call_expert("head_expert", meta)
	parsed = _parse_json_safe(raw)
	if isinstance(parsed, dict):
	return bool(parsed.get("valid", True))
	return True

	def _extract_memory_facts(self, ctx: ExecutionContext) -> List[str]:
	facts: List[str] = []
	review_facts = ctx.final_review.get("memory_facts", [])
	if isinstance(review_facts, list):
	facts.extend([f for f in review_facts if isinstance(f, str) and f.strip()])

	for item in ctx.step_results:
	if not isinstance(item, dict):
	continue
	result = item.get("result")
	if isinstance(result, dict) and result.get("output"):
	facts.append(str(result["output"]))
	elif isinstance(result, str) and result.strip():
	facts.append(result.strip()[:300])

	deduped: List[str] = []
	seen = set()
	for fact in facts:
	key = fact.strip().lower()
	if key and key not in seen:
	seen.add(key)
	deduped.append(fact.strip())
	return deduped[:20]

	def _update_user_profile(self, ctx: ExecutionContext) -> None:
	if self._memory_bank is None or not self.user_id:
	return

	updates = ctx.final_review.get("user_profile_updates", {})
	if not isinstance(updates, dict) or not updates:
	return

	try:
	self._user_profile.update(updates)
	if hasattr(self._memory_bank, "set_profile"):
	_safe_call(self._memory_bank, "set_profile", "user", self.user_id, self._user_profile)
	elif hasattr(self._memory_bank.gator, "set_profile"):
	_safe_call(self._memory_bank.gator, "set_profile", "user", self.user_id, self._user_profile)
	except Exception as exc:
	print(f"[PackedLLM] _update_user_profile failed (non-fatal): {exc}")

	def _update_bot_profile(self, ctx: ExecutionContext) -> None:
	if self._memory_bank is None or not self.bot_id:
	return

	updates = ctx.final_review.get("bot_profile_updates", {})
	if not isinstance(updates, dict):
	updates = {}
	emotional = ctx.affective_state.get("emotional_state", {})
	physical = ctx.affective_state.get("physical_state", {})

	try:
	self._bot_profile.update(updates)
	if emotional:
	self._bot_profile["last_emotional_state"] = emotional
	if physical:
	self._bot_profile["last_physical_state"] = physical

	if hasattr(self._memory_bank, "set_profile"):
	_safe_call(self._memory_bank, "set_profile", "bot", self.bot_id, self._bot_profile)
	elif hasattr(self._memory_bank.gator, "set_profile"):
	_safe_call(self._memory_bank.gator, "set_profile", "bot", self.bot_id, self._bot_profile)
	except Exception as exc:
	print(f"[PackedLLM] _update_bot_profile failed (non-fatal): {exc}")

	def _write_memory(self, ctx: ExecutionContext) -> None:
	if self._memory_bank is None:
	return

	facts = self._extract_memory_facts(ctx)
	if not facts:
	return

	try:
	for fact in facts:
	if hasattr(self._memory_bank, "store"):
	_safe_call(
	self._memory_bank,
	"store",
	text=fact,
	metadata={
	"bot_id": self.bot_id,
	"user_id": self.user_id,
	"timestamp": time.time(),
	},
	)
	elif hasattr(self._memory_bank, "store_knowledge"):
	_safe_call(
	self._memory_bank,
	"store_knowledge",
	[fact],
	tags=["conversation_memory"],
	source="PackedLLM",
	importance=0.7,
	)
	except Exception as exc:
	print(f"[PackedLLM] _write_memory failed (non-fatal): {exc}")

	def _stream_response(self, text: str) -> Generator[str, None, None]:
	for token in text.split(" "):
	yield token + " "

	def _prompt_action_planning(self, ctx: ExecutionContext, action_request: str, step_goal: str) -> str:
	prompt = (
	"You are LogicExpert planning an executable workflow.\n"
	"Return ONLY JSON with keys: thought, architecture, imports, global.\n"
	"architecture must include a main entry point.\n"
	"Be concise, deterministic, and implementation-oriented.\n\n"
	f"Action request: {action_request}\n"
	f"Original prompt: {ctx.prompt}\n"
	f"Goal: {step_goal}"
	)
	return self._inject_think_blocks(
	ctx,
	stage="logic_action_planning",
	target_expert="logic_expert",
	task_prompt=prompt,
	output_contract="JSON object with thought, architecture, imports, global.",
	)

	def _prompt_code_generation(self, ctx: ExecutionContext, action_request: str, step_goal: str,
	plan: Dict[str, Any]) -> str:
	prompt = (
	"You are CodeExpert writing a complete, runnable Python script.\n"
	"Requirements:\n"
	"1) Complete the user's request.\n"
	"2) Create request_report.txt in the working directory.\n"
	"3) Write the final result into that file and print it.\n"
	"4) Use only requested imports and stdlib unless absolutely necessary.\n"
	"5) The script must be self-contained and executable as-is.\n"
	"6) Output ONLY raw Python source code, no fences, no commentary.\n\n"
	f"Action request: {action_request}\n"
	f"Original prompt: {ctx.prompt}\n"
	f"Goal: {step_goal}\n"
	f"Plan: {json.dumps(plan, ensure_ascii=False, default=str)}"
	)
	return self._inject_think_blocks(
	ctx,
	stage="code_action_generation",
	target_expert="code_expert",
	task_prompt=prompt,
	output_contract="Raw Python source code only.",
	)

	def _prompt_action_review(self, ctx: ExecutionContext, action_request: str, plan: Dict[str, Any],
	run_result: Dict[str, Any]) -> str:
	prompt = (
	"You are HeadExpert reviewing an ActionExpert run.\n"
	"Return ONLY JSON with keys: verdict (success\|retry\|abandon) and feedback.\n"
	"Be strict and prefer retry only for fixable failures.\n\n"
	f"Original request: {action_request}\n"
	f"Plan: {json.dumps(plan, ensure_ascii=False, default=str)}\n"
	f"Stdout: {run_result.get('stdout', '')}\n"
	f"Stderr: {run_result.get('stderr', '')}\n"
	f"Exit code: {run_result.get('exit_code', '')}\n"
	f"Success: {run_result.get('success', False)}"
	)
	return self._inject_think_blocks(
	ctx,
	stage="head_action_review",
	target_expert="head_expert",
	task_prompt=prompt,
	output_contract="JSON object with verdict and feedback.",
	)

	def _prompt_action_repair(self, ctx: ExecutionContext, action_request: str, previous_plan: Dict[str, Any],
	failure_details: Dict[str, Any]) -> str:
	prompt = (
	"You are LogicExpert repairing a failed ActionExpert plan.\n"
	"Return ONLY JSON with keys: changed_functions and notes.\n"
	"Focus on the smallest targeted fix.\n\n"
	f"Original request: {action_request}\n"
	f"Previous plan: {json.dumps(previous_plan, ensure_ascii=False, default=str)}\n"
	f"Failure details: {json.dumps(failure_details, ensure_ascii=False, default=str)}"
	)
	return self._inject_think_blocks(
	ctx,
	stage="logic_action_repair",
	target_expert="logic_expert",
	task_prompt=prompt,
	output_contract="JSON repair object.",
	)

	def _prompt_web_queries(self, ctx: ExecutionContext, prompt: str) -> str:
	prompt_text = (
	"Generate a concise list of web search subqueries.\n"
	"Return ONLY a JSON array of objects with keys: query, description, deep_search.\n"
	"Use the fewest queries needed and keep them high-signal.\n"
	"All query and description fields must be in English.\n\n"
	f"Prompt: {prompt}\n"
	f"Response goal: {json.dumps(ctx.response_goal, ensure_ascii=False, default=str)}"
	)
	return self._inject_think_blocks(
	ctx,
	stage="head_web_queries",
	target_expert="head_expert",
	task_prompt=prompt_text,
	output_contract="JSON array of web search queries.",
	)

	def _prompt_web_answer_subquery(self, ctx: ExecutionContext, query: str, evidence: str, prompt: str) -> str:
	prompt_text = (
	"Answer the web subquery using only the provided evidence.\n"
	"Return a concise plain-text answer grounded in the evidence.\n"
	"Do NOT return JSON, code, or emotion metadata.\n\n"
	f"Subquery: {query}\n"
	f"Evidence: {evidence[:8000]}\n"
	f"Prompt: {prompt}"
	)
	return self._inject_think_blocks(
	ctx,
	stage="head_web_answer_subquery",
	target_expert="head_expert",
	task_prompt=prompt_text,
	output_contract="Concise evidence-grounded answer.",
	)

	def _prompt_web_review(self, ctx: ExecutionContext, prompt: str, round_results: List[Dict[str, Any]]) -> str:
	prompt_text = (
	"Review the web research result and decide whether more information is needed.\n"
	"Return ONLY JSON with keys: verdict (sufficient\|more_info\|abandon) and new_subqueries (array).\n"
	"Return an empty array when no additional searches are needed.\n\n"
	f"Prompt: {prompt}\n"
	f"Round results: {json.dumps(round_results, ensure_ascii=False, default=str)}\n"
	f"Response goal: {json.dumps(ctx.response_goal, ensure_ascii=False, default=str)}"
	)
	return self._inject_think_blocks(
	ctx,
	stage="head_web_review",
	target_expert="head_expert",
	task_prompt=prompt_text,
	output_contract="JSON review object for web research.",
	)

	def _prompt_web_synthesis(self, ctx: ExecutionContext, prompt: str,
	research_rounds: List[List[Dict[str, Any]]]) -> str:
	prompt_text = (
	"Synthesize the final web answer from the research results.\n"
	"Return a direct, readable answer in English with no extra commentary.\n\n"
	f"Prompt: {prompt}\n"
	f"Research rounds: {json.dumps(research_rounds, ensure_ascii=False, default=str)}"
	)
	return self._inject_think_blocks(
	ctx,
	stage="head_web_synthesis",
	target_expert="head_expert",
	task_prompt=prompt_text,
	output_contract="Final synthesized web answer.",
	)
	# ------------------------------------------------------------------
	# Public expert convenience methods
	# ------------------------------------------------------------------

	def creative_expert(self, prompt: str, **kwargs: Any) -> str:
	return self._call_expert("creative_expert", prompt, **kwargs)

	def code_expert(self, prompt: str, **kwargs: Any) -> str:
	return self._call_expert("code_expert", prompt, **kwargs)

	def logic_expert(self, prompt: str, mode: str = "deep_then_answer", **kwargs: Any) -> str:
	return self._call_expert("logic_expert", prompt, mode=mode, **kwargs)

	def role_expert(self, prompt: str, **kwargs: Any) -> str:
	return self._call_expert("role_expert", prompt, **kwargs)

	def affect_expert(self, text: str, **kwargs: Any) -> str:
	return self._call_expert("affect_expert", text, **kwargs)

	def head_expert(self, prompt: str, image: Optional[str] = None, **kwargs: Any) -> str:
	return self._call_expert("head_expert", prompt, image=image, **kwargs)

	def vision_expert(self, prompt: str, image: Optional[str] = None, **kwargs: Any) -> str:
	return self._call_expert("vision_expert", prompt, image=image, **kwargs)

	def math_expert(self, prompt: str, **kwargs: Any) -> str:
	return self._call_expert("math_expert", prompt, **kwargs)

	def tool_expert(self, query: str, tools: Optional[List[Dict[str, Any]]] = None, **kwargs: Any) -> str:
	return self._call_expert("tool_expert", query, tools=tools, **kwargs)

	def translation_expert(self, text: str, **kwargs: Any) -> str:
	_ = kwargs
	if not CHINESE_RE.search(text or ""):
	return text
	return self._call_expert("translation_expert", text)

	def action_expert(self, prompt: str, **kwargs: Any) -> str:
	ctx = ExecutionContext(prompt=prompt, tools=kwargs.get("tools"))
	step = RouteStep(
	expert="action_expert",
	sub_prompt=prompt,
	goal=kwargs.get("goal", prompt),
	kwargs={k: v for k, v in kwargs.items() if k not in {"tools", "goal"}},
	)
	result = self._run_action_pipeline(ctx, step)
	return str(result.get("output", ""))

	def web_expert(self, prompt: str, **kwargs: Any) -> str:
	ctx = ExecutionContext(prompt=prompt, tools=kwargs.get("tools"))
	step = RouteStep(
	expert="web_expert",
	sub_prompt=prompt,
	goal=kwargs.get("goal", prompt),
	kwargs={k: v for k, v in kwargs.items() if k not in {"tools", "goal"}},
	)
	result = self._run_web_pipeline(ctx, step)
	return str(result.get("output", ""))

	def _get_expert(self, key: str) -> Optional[nn.Module]:
	# Try exact key first, then normalised variant.
	try:
	return self[key]
	except Exception:
	pass
	norm = _normalise_expert_name(key)
	if norm != key:
	try:
	return self[norm]
	except Exception:
	pass
	return None

	def _review_and_finalize(self, ctx: ExecutionContext) -> None:
	self._review_final_response(ctx)
	self._write_memory(ctx)
	self._update_user_profile(ctx)
	self._update_bot_profile(ctx)

	# ------------------------------------------------------------------
	# Summary / expert management
	# ------------------------------------------------------------------

	def summary(self) -> Dict[str, Any]:
	if self._packedlm_runtime is not None:
	runtime_summary = _safe_call(self._packedlm_runtime, "summary", default={}) or {}
	raw_loaded = runtime_summary.get("experts") or self._runtime_expert_names or list(self.keys())
	else:
	raw_loaded = list(self.keys()) or self._runtime_expert_names

	loaded = _expert_names_canonical(raw_loaded)
	missing_model = [k for k in self.MODEL_EXPERTS if k not in loaded]

	pipeline_status = {
	"action_expert": self._get_codebox() is not None,
	"web_expert": self._web is not None,
	}
	missing_pipeline = [k for k, ok in pipeline_status.items() if not ok]

	return {
	"model_class": "PackedLLM",
	"bot_id": self.bot_id,
	"user_id": self.user_id,
	"experts": loaded + [k for k, ok in pipeline_status.items() if ok],
	"missing_experts": missing_model + missing_pipeline,
	"pipeline_status": pipeline_status,
	"memory_mounted": self._memory_bank is not None,
	"web_mounted": self._web is not None,
	"hardware": self._hardware_state,
	"packedlm_runtime_loaded": self._packedlm_runtime is not None,
	"packedlm_checkpoint": self.packedlm_checkpoint,
	}

	def reload_expert(self, expert_name: str) -> Any:
	if self._packedlm_runtime is not None:
	return self._packedlm_runtime.reload_expert(expert_name)

	expert = self._get_expert(expert_name)
	if expert is not None and hasattr(expert, "reload"):
	expert.reload()
	return expert
	raise KeyError(f"Expert '{expert_name}' not loaded or has no reload() method.")

	def unload_expert(self, expert_name: str) -> None:
	if self._packedlm_runtime is not None:
	self._packedlm_runtime.unload_expert(expert_name)
	return

	expert = self._get_expert(expert_name)
	if expert is not None:
	if hasattr(expert, "unload"):
	try:
	expert.unload()
	except Exception:
	pass
	key = _normalise_expert_name(expert_name)
	try:
	del self[key]
	except Exception:
	try:
	del self[expert_name]
	except Exception:
	pass

	def unload_all(self) -> None:
	if self._packedlm_runtime is not None:
	self._packedlm_runtime.unload_all()
	return

	for key in list(self.keys()):
	self.unload_expert(key)

	# ------------------------------------------------------------------
	# Source collection helpers (used by save_checkpoint)
	# ------------------------------------------------------------------

	def _collect_project_sources(self) -> Dict[str, str]:
	sources: Dict[str, str] = {}
	module_names = ["PackedLLM", "GATOR", "CompileWeb", "CodeBox", "PackedLM"]
	for name in module_names:
	mod = sys.modules.get(name)
	if mod is None:
	continue
	try:
	src = inspect.getsource(mod)
	except Exception:
	src = None
	if not src:
	file_path = getattr(mod, "__file__", None)
	if file_path and os.path.exists(file_path):
	try:
	with open(file_path, "r", encoding="utf-8") as f:
	src = f.read()
	except Exception:
	src = None
	if src:
	sources[name] = src
	return sources

	def _collect_vendor_sources(self) -> Dict[str, Dict[str, str]]:
	"""
	Collect pure-Python third-party modules that are already loaded.
	Conservative: only source-backed modules outside the project and stdlib.
	"""
	sources: Dict[str, Dict[str, str]] = {}
	project_root = Path(__file__).resolve().parent
	stdlib_root = Path(sys.base_prefix).resolve()

	for name, mod in list(sys.modules.items()):
	if mod is None:
	continue
	if name in {"PackedLLM", "GATOR", "CompileWeb", "CodeBox", "PackedLM"}:
	continue
	file_path = getattr(mod, "__file__", None)
	if not file_path:
	continue
	try:
	p = Path(file_path).resolve()
	except Exception:
	continue
	if not p.exists() or p.suffix.lower() != ".py":
	continue
	try:
	src = inspect.getsource(mod)
	except Exception:
	try:
	src = p.read_text(encoding="utf-8")
	except Exception:
	continue
	try:
	if str(p).startswith(str(project_root)):
	continue
	if str(p).startswith(str(stdlib_root)) and "site-packages" not in str(p):
	continue
	except Exception:
	pass
	sources[name] = {"file": str(p), "source": src}
	return sources

	# ------------------------------------------------------------------
	# Checkpoint: save
	# ------------------------------------------------------------------

	def save_checkpoint(self, path: Union[str, Path] = "LM.pt") -> None:
	"""
	Save a fully self-contained checkpoint.

	Large binary blobs (PackedLM weights, memory bank, web index, CodeBox)
	are split into fixed-size chunks before pickling so that no single
	allocation exceeds _CHUNK_BYTES. All source code required to reconstruct
	the model is embedded as plain text. After loading, the only external
	dependency is PyTorch.

	Layout (zip container written atomically):
	manifest.pt – metadata, profiles, expert names, source maps
	lm_chunk_N.bin – PackedLM weight file slices (N = 0, 1, …)
	mem_chunk_N.bin – MemoryBank checkpoint slices
	web_chunk_N.bin – Web index slices
	box_chunk_N.bin – CodeBox .pt slices
	"""
	path = Path(path)
	tmp_path = path.with_suffix(".tmp_save")

	memory_checkpoint_path: Optional[str] = None
	if self._memory_bank is not None:
	cp = getattr(self._memory_bank, "checkpoint_path", None)
	if cp is not None:
	memory_checkpoint_path = str(cp)

	codebox_checkpoint_path: Optional[str] = None
	if self._codebox is not None:
	cp = getattr(self._codebox, "_model_path", None)
	if cp is not None and os.path.exists(str(cp)):
	codebox_checkpoint_path = str(cp)
	else:
	bd = getattr(self._codebox, "base_dir", None)
	if bd and os.path.isdir(str(bd)):
	_tmp_box = str(tmp_path) + ".codebox.pt"
	try:
	torch.save(self._codebox, _tmp_box, pickle_protocol=5)
	codebox_checkpoint_path = _tmp_box
	except Exception:
	codebox_checkpoint_path = None

	# Web checkpoint
	web_checkpoint_path: Optional[str] = None
	if self._web is not None:
	wp = getattr(self._web, "web_path", None)
	if wp and os.path.exists(str(wp)):
	web_checkpoint_path = str(wp)

	manifest = {
	"format_version": self._CHECKPOINT_FORMAT_VERSION,
	"class_name": self.__class__.__name__,
	"init_kwargs": {
	"bot_id": self.bot_id,
	"user_id": self.user_id,
	"model_dir": self.model_dir,
	"memory_dir": self.memory_dir,
	"web": self._web is not None,
	"hardware_probe": self._hardware_probe_enabled,
	"packedlm_module": self.packedlm_module,
	},
	"state_dict": self.state_dict(),
	"bot_profile": self._bot_profile,
	"user_profile": self._user_profile,
	"hardware_state": self._hardware_state,
	"runtime_expert_names": self._runtime_expert_names,
	"project_sources": self._collect_project_sources(),
	"vendor_sources": self._collect_vendor_sources(),
	"lm_chunk_count": 0,
	"mem_chunk_count": 0,
	"web_chunk_count": 0,
	"box_chunk_count": 0,
	}

	def _write_chunks_to_zip(
	zf: zipfile.ZipFile,
	file_path: Optional[str],
	prefix: str,
	manifest_key: str,
	) -> None:
	"""Stream a file into the zip as fixed-size chunks, updating manifest."""
	if not file_path or not os.path.exists(file_path):
	return
	count = 0
	with open(file_path, "rb") as fh:
	while True:
	chunk = fh.read(_CHUNK_BYTES)
	if not chunk:
	break
	zf.writestr(f"{prefix}{count}.bin", chunk)
	count += 1
	manifest[manifest_key] = count

	try:
	with zipfile.ZipFile(str(tmp_path), "w", compression=zipfile.ZIP_STORED, allowZip64=True) as zf:
	# Write large binary blobs first (streaming, low peak RAM)
	_write_chunks_to_zip(zf, self.packedlm_checkpoint, "lm_chunk_", "lm_chunk_count")
	_write_chunks_to_zip(zf, memory_checkpoint_path, "mem_chunk_", "mem_chunk_count")
	_write_chunks_to_zip(zf, web_checkpoint_path, "web_chunk_", "web_chunk_count")
	_write_chunks_to_zip(zf, codebox_checkpoint_path, "box_chunk_", "box_chunk_count")

	# Write manifest last (small; contains all metadata + source code)
	buf = io.BytesIO()
	torch.save(manifest, buf, pickle_protocol=5)
	zf.writestr("manifest.pt", buf.getvalue())

	# Atomic replace
	if path.exists():
	path.unlink()
	tmp_path.rename(path)

	finally:
	# Clean up any temp files we created
	for _tmp in [str(tmp_path), str(tmp_path) + ".codebox.pt"]:
	if os.path.exists(_tmp):
	try:
	os.unlink(_tmp)
	except Exception:
	pass

	# ------------------------------------------------------------------
	# Checkpoint: load
	# ------------------------------------------------------------------

	@classmethod
	def load_checkpoint(cls, path: Union[str, Path] = "LM.pt", map_location: str = "cpu") -> "PackedLLM":
	"""
	Load a checkpoint produced by save_checkpoint.

	Handles both the new zip-container format (v3) and the legacy flat
	torch.save format (v1/v2) for backwards compatibility.
	"""
	path = Path(path)

	# ------ Detect format ------
	if zipfile.is_zipfile(str(path)):
	return cls._load_checkpoint_v3(path, map_location)

	# Legacy flat format
	payload = torch.load(str(path), map_location=map_location, weights_only=False)
	if isinstance(payload, cls):
	return payload
	if not isinstance(payload, dict):
	raise TypeError(f"Unsupported checkpoint payload: {type(payload).__name__}")
	return cls._load_from_flat_payload(payload, map_location)

	@classmethod
	def _load_checkpoint_v3(cls, path: Path, map_location: str) -> "PackedLLM":
	"""Load a v3 zip-container checkpoint, streaming binary blobs straight to disk."""
	with zipfile.ZipFile(str(path), "r") as zf:
	manifest_bytes = zf.read("manifest.pt")
	buf = io.BytesIO(manifest_bytes)
	manifest = torch.load(buf, map_location=map_location, weights_only=False)

	def _stream_to_temp(prefix: str, count: int, suffix: str, file_prefix: str) -> Optional[str]:
	if count == 0:
	return None
	fd, out_path = tempfile.mkstemp(prefix=file_prefix, suffix=suffix)
	os.close(fd)
	try:
	with open(out_path, "wb") as out_fh:
	for i in range(count):
	with zf.open(f"{prefix}{i}.bin") as entry:
	while True:
	chunk = entry.read(_CHUNK_BYTES)
	if not chunk:
	break
	out_fh.write(chunk)
	except Exception:
	try:
	os.unlink(out_path)
	except Exception:
	pass
	return None
	return out_path

	temp_lm_path = _stream_to_temp("lm_chunk_", manifest.get("lm_chunk_count", 0), ".pt", "packedlm_")
	temp_web_path = _stream_to_temp("web_chunk_", manifest.get("web_chunk_count", 0), ".pt", "websearch_")
	temp_box_path = _stream_to_temp("box_chunk_", manifest.get("box_chunk_count", 0), ".pt", "codebox_")

	temp_mem_root: Optional[str] = None
	mem_count = manifest.get("mem_chunk_count", 0)
	if mem_count:
	temp_mem_root = tempfile.mkdtemp(prefix="packedllm_memory_")
	try:
	with open(os.path.join(temp_mem_root, "GATOR.pt"), "wb") as out_fh:
	for i in range(mem_count):
	with zf.open(f"mem_chunk_{i}.bin") as entry:
	while True:
	chunk = entry.read(_CHUNK_BYTES)
	if not chunk:
	break
	out_fh.write(chunk)
	except Exception:
	temp_mem_root = None

	return cls._reconstruct_from_manifest(
	manifest, map_location,
	lm_path=temp_lm_path,
	mem_root=temp_mem_root,
	web_path=temp_web_path,
	box_path=temp_box_path,
	)

	@classmethod
	def _load_from_flat_payload(cls, payload: Dict[str, Any], map_location: str) -> "PackedLLM":
	"""Load a legacy v1/v2 flat torch.save payload."""

	def _get_bytes(key: str) -> Optional[bytes]:
	val = payload.get(key)
	if val is None:
	return None
	if isinstance(val, (bytes, bytearray)):
	return bytes(val)
	if isinstance(val, list):
	return _chunks_to_bytes(val)
	return None

	lm_path = _write_chunks_to_temp(
	[b] if (b := _get_bytes("packedlm_checkpoint_bytes")) else None, suffix=".pt", prefix="packedlm_"
	)
	web_path = _write_chunks_to_temp(
	[b] if (b := _get_bytes("web_checkpoint_bytes")) else None, suffix=".pt", prefix="websearch_"
	)

	mem_root: Optional[str] = None
	mem_bytes = _get_bytes("memory_checkpoint_bytes")
	if mem_bytes is not None:
	mem_root = tempfile.mkdtemp(prefix="packedllm_memory_")
	try:
	with open(os.path.join(mem_root, "GATOR.pt"), "wb") as f:
	f.write(mem_bytes)
	except Exception:
	mem_root = None

	return cls._reconstruct_from_manifest(
	payload, map_location,
	lm_path=lm_path, mem_root=mem_root, web_path=web_path, box_path=None,
	)

	@classmethod
	def _reconstruct_from_manifest(
	cls,
	manifest: Dict[str, Any],
	map_location: str,
	lm_path: Optional[str] = None,
	mem_root: Optional[str] = None,
	web_path: Optional[str] = None,
	box_path: Optional[str] = None,
	) -> "PackedLLM":
	"""
	Shared reconstruction logic. Takes paths to already-materialized blobs
	(written by the caller) rather than in-memory bytes, so loading never
	needs to hold a full multi-GB checkpoint in RAM at once.
	"""
	init_kwargs = dict(manifest.get("init_kwargs", {}))

	temp_vendor_dir = tempfile.mkdtemp(prefix="packedllm_vendor_")

	def _write_module_source(module_name: str, src: str, file_hint: Optional[str] = None) -> None:
	module_rel = module_name.replace(".", os.sep)
	if file_hint and os.path.basename(file_hint) == "__init__.py":
	target = os.path.join(temp_vendor_dir, module_rel, "__init__.py")
	else:
	target = os.path.join(temp_vendor_dir, f"{module_rel}.py")
	os.makedirs(os.path.dirname(target), exist_ok=True)
	with open(target, "w", encoding="utf-8") as f:
	f.write(src)

	for name, src in (manifest.get("project_sources") or {}).items():
	try:
	_write_module_source(name, src)
	except Exception:
	pass

	for name, meta in (manifest.get("vendor_sources") or {}).items():
	try:
	if isinstance(meta, dict):
	_write_module_source(name, meta.get("source", ""), meta.get("file"))
	elif isinstance(meta, str):
	_write_module_source(name, meta)
	except Exception:
	pass

	if temp_vendor_dir not in sys.path:
	sys.path.insert(0, temp_vendor_dir)

	if lm_path:
	init_kwargs["packedlm_checkpoint"] = lm_path
	if mem_root:
	init_kwargs["memory_dir"] = mem_root
	init_kwargs["web"] = False

	obj = cls(**init_kwargs)

	try:
	sd = manifest.get("state_dict")
	if sd:
	obj.load_state_dict(sd, strict=False)
	except Exception:
	pass

	obj._bot_profile = manifest.get("bot_profile", {}) or {}
	obj._user_profile = manifest.get("user_profile", {}) or {}
	obj._hardware_state = manifest.get("hardware_state", {}) or {}
	obj._runtime_expert_names = _expert_names_canonical(
	manifest.get("runtime_expert_names", []) or []
	)

	if web_path and Web is not None:
	try:
	obj._web = Web(web_location=web_path, auto_create=False)
	except Exception:
	obj._web = None

	if box_path and os.path.exists(box_path):
	try:
	codebox_obj = torch.load(box_path, map_location=map_location, weights_only=False)
	obj._codebox = codebox_obj
	except Exception:
	obj._codebox = None

	return obj

	# ------------------------------------------------------------------
	# Convenience aliases
	# ------------------------------------------------------------------

	def save(self, path: Union[str, Path] = "LM.pt") -> None:
	self.save_checkpoint(path)

	# ------------------------------------------------------------------
	# Representation / containment
	# ------------------------------------------------------------------

	def __repr__(self) -> str:
	loaded = _expert_names_canonical(
	list(self.keys()) if len(self.keys()) else self._runtime_expert_names
	)
	missing = [k for k in self.REQUIRED_EXPERTS if k not in loaded]
	parts = [f"bot_id={self.bot_id!r}", f"user_id={self.user_id!r}"]
	parts.append(f"experts=[{', '.join(loaded)}]")
	if missing:
	parts.append(f"missing=[{', '.join(missing)}]")
	if self._packedlm_runtime is not None:
	parts.append("packedlm_runtime=True")
	return f"PackedLLM({', '.join(parts)})"

	def __contains__(self, key: object) -> bool: # type: ignore[override]
	try:
	norm = _normalise_expert_name(str(key))
	return (
	key in self._modules
	or norm in self._modules
	or key in self._runtime_expert_names
	or norm in self._runtime_expert_names
	)
	except Exception:
	return False


	# ---------------------------------------------------------------------------
	# Factory
	# ---------------------------------------------------------------------------

	def build_packedlm(
	bot_id: Optional[str] = None,
	user_id: Optional[str] = None,
	model_dir: str = "models",
	memory_dir: Optional[str] = None,
	web: bool = False,
	hardware_probe: bool = True,
	expert_modules: Optional[Dict[str, nn.Module]] = None,
	packedlm_checkpoint: Optional[str] = "LM.pt",
	packedlm_module: Optional[str] = "PackedLM",
	) -> PackedLLM:
	return PackedLLM(
	bot_id=bot_id,
	user_id=user_id,
	model_dir=model_dir,
	memory_dir=memory_dir,
	web=web,
	hardware_probe=hardware_probe,
	expert_modules=expert_modules,
	packedlm_checkpoint=packedlm_checkpoint,
	packedlm_module=packedlm_module,
	)

	'''
	# ---------------------------------------------------------------------------
	# __main__ integration test
	# ---------------------------------------------------------------------------

	if __name__ == "__main__":
	import traceback

	SEP = "=" * 64

	parser = argparse.ArgumentParser(description="PackedLLM integration test")
	parser.add_argument("--checkpoint", type=str, default="LM.pt")
	parser.add_argument("--image", type=str, default=None)
	parser.add_argument("--bot-id", type=str, default="pip")
	parser.add_argument("--user-id", type=str, default="test_user")
	parser.add_argument("--compile-only", action="store_true")
	parser.add_argument("--packedlm-module", type=str, default="PackedLM")
	parser.add_argument("--save-path", type=str, default="PackedLLM.pt")
	args = parser.parse_args()

	print(SEP)
	print(" PackedLLM — Expert integration test")
	print(SEP)

	def fail(msg: str, exc: Optional[BaseException] = None):
	print(f"FAILED: {msg}")
	if exc is not None:
	traceback.print_exc()
	raise SystemExit(1)

	try:
	lm = PackedLLM(
	bot_id=args.bot_id,
	user_id=args.user_id,
	model_dir="models",
	memory_dir=None,
	web=True,
	hardware_probe=True,
	expert_modules=None,
	packedlm_checkpoint=args.checkpoint,
	packedlm_module=args.packedlm_module,
	)
	print(f"Loaded: {lm}")
	except Exception as exc:
	fail("could not initialize PackedLLM", exc)

	print("\n[1] summary()")
	try:
	s = lm.summary()
	print(f"experts loaded : {s['experts']}")
	print(f"missing experts : {s['missing_experts']}")
	print(f"memory mounted : {s['memory_mounted']}")
	print(f"web mounted : {s['web_mounted']}")
	print(f"runtime loaded : {s['packedlm_runtime_loaded']}")
	if not s["memory_mounted"]:
	fail("Memory bank did not mount")
	if not s["web_mounted"]:
	fail("Web module did not mount")
	if not s["packedlm_runtime_loaded"]:
	fail(f"PackedLM runtime was not loaded from {args.checkpoint}")
	if s["missing_experts"]:
	fail(f"Missing experts after load: {s['missing_experts']}")
	print("OK")
	except SystemExit:
	raise
	except Exception as exc:
	fail("summary()", exc)

	if not args.compile_only:
	print("\n[2] direct expert smoke tests")
	tests = [
	("creative_expert", lambda: lm.creative_expert("Write a one-sentence haiku about rivers.")),
	("code_expert", lambda: lm.code_expert("Write a Python one-liner to reverse a string.")),
	("logic_expert", lambda: lm.logic_expert("Is 97 a prime number? Explain briefly.", mode="deep_then_answer")),
	("math_expert", lambda: lm.math_expert("What is 17 * 23?")),
	("translation_expert", lambda: lm.translation_expert("把这句中文翻译成英文：今天天气很好。")),
	("affect_expert", lambda: lm.affect_expert("I'm extremely frustrated and need a calm response.")),
	("role_expert", lambda: lm.role_expert("Greet the user warmly.", character_card="You are a cheerful assistant named Pip.")),
	("head_expert", lambda: lm.head_expert("Plan a two-step response to: What is the capital of France?")),
	("tool_expert", lambda: lm.tool_expert(
	"Get the weather for ZIP 90210.",
	tools=[{
	"name": "get_weather",
	"description": "Fetch weather by ZIP.",
	"parameters": {
	"type": "object",
	"properties": {"zip": {"type": "string"}},
	"required": ["zip"],
	},
	}],
	)),
	("web_expert", lambda: lm.web_expert("What is the current prime minister of the UK?")),
	("action_expert", lambda: lm.action_expert(
	"Create request_report.txt containing exactly: Action pipeline OK"
	)),
	]

	if args.image and os.path.exists(args.image):
	tests.append(("vision_expert", lambda: lm.vision_expert("Describe what you see.", image=args.image)))
	else:
	print("Vision smoke test skipped (pass --image /path/to/image).")

	for name, fn in tests:
	try:
	t0 = time.perf_counter()
	result = fn()
	elapsed = (time.perf_counter() - t0) * 1000
	preview = str(result)[:180].replace("\n", " ")
	print(f" ✓ {name:<22} [{elapsed:>7.1f}ms] {preview}")
	if not result or not str(result).strip():
	fail(f"{name} returned empty output")
	except SystemExit:
	raise
	except Exception as exc:
	fail(name, exc)

	print("\n[3] pipeline forward() smoke tests")
	forward_prompts = [
	"Write a Python function to calculate compound interest.",
	"把这句中文翻译成英文：今天的天气很好。",
	"Explain the main idea of Newton's second law in one paragraph.",
	"Search the web and summarize the latest open source AI model releases.",
	"Create a small action report file saying the pipeline is working.",
	]

	for prompt in forward_prompts:
	try:
	t0 = time.perf_counter()
	response = lm.forward(prompt, image=args.image if args.image and os.path.exists(args.image) else None)
	elapsed = (time.perf_counter() - t0) * 1000
	preview = response[:200].replace("\n", " ")
	print(f" ✓ [{elapsed:>6.0f}ms] {prompt[:55]}...")
	print(f" → {preview}")
	if not response or not str(response).strip():
	fail(f"forward() returned empty response for: {prompt}")
	except SystemExit:
	raise
	except Exception as exc:
	fail(f"forward() for prompt: {prompt}", exc)

	print("\n[4] checkpoint round-trip")
	temp_ckpt = os.path.join(tempfile.gettempdir(), f"PackedLLM_roundtrip_{int(time.time())}.pt")
	try:
	print(f" Saving to {temp_ckpt} ...")
	lm.save_checkpoint(temp_ckpt)

	print(" Unloading original model before reload (avoids two full models resident at once)...")
	try:
	lm.unload_all()
	except Exception:
	pass
	lm._packedlm_runtime = None
	import gc

	gc.collect()
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	time.sleep(1) # give the OS/driver a moment to actually reclaim memory

	print(f" Loading back ...")
	lm2 = PackedLLM.load_checkpoint(temp_ckpt)
	if not isinstance(lm2, PackedLLM):
	fail("Loaded object is not PackedLLM")
	s2 = lm2.summary()
	if s2["missing_experts"]:
	fail(f"Reloaded model missing experts: {s2['missing_experts']}")
	print(f" ✓ saved to {temp_ckpt} and reloaded successfully")
	print(f" ✓ reloaded repr: {lm2}")
	# Quick sanity – run one forward on the reloaded model
	r = lm2.head_expert("Say hello.")
	if not r or not r.strip():
	fail("Reloaded model returned empty head_expert output")
	print(f" ✓ reloaded head_expert sanity: {r[:80]}")
	except SystemExit:
	raise
	except Exception as exc:
	fail("checkpoint round-trip", exc)
	finally:
	if os.path.exists(temp_ckpt):
	try:
	os.unlink(temp_ckpt)
	except Exception:
	pass

	print("\n[5] final save")
	try:
	lm.save_checkpoint(args.save_path)
	sz_mb = os.path.getsize(args.save_path) / 1e6
	print(f" ✓ final checkpoint saved to {args.save_path} ({sz_mb:.1f} MB)")
	except SystemExit:
	raise
	except Exception as exc:
	fail("final save", exc)

	print(f"\n{SEP}")
	print(" Test complete.")
	print(f"{SEP}\n")

	'''

	"""
	PackedLLMRunner.py

	A thin, ergonomic wrapper around a saved PackedLLM checkpoint (e.g. PackedLLM.pt).

	Loading a PackedLLM checkpoint directly with PackedLLM.load_checkpoint() gets you
	the model, but every expert is lazily warmed on first real use, and the only way
	to use the embedded modules (MemoryBank/GATOR, Web, CodeBox) is to route a prompt
	through the full plan -> route -> execute -> persona -> review pipeline. This class
	sits on top of that and gives you:

	- one-call loading + optional warmup (so first-request latency is paid at
	startup instead of in front of a real user)
	- the full orchestrated pipeline via .chat(...)
	- every one of the 12 experts directly, bypassing the planner/router entirely
	- direct access to MemoryBank (store/recall facts, read/write profiles),
	Web (raw search, no multi-round research pipeline), and CodeBox (run
	arbitrary code, no plan/generate/review wrapped around it)
	- status reporting, expert reload/unload, and re-saving

	Usage:

	from PackedLLMRunner import PackedLLMRunner

	bot = PackedLLMRunner("PackedLLM.pt", bot_id="pip", user_id="alice")

	print(bot.chat("What's a clever way to sort a list in Python?")) # full pipeline
	print(bot.code("Write a function that reverses a string.")) # expert directly
	print(bot.memory_recall("alice's favorite color")) # embedded module directly
	print(bot.run_code("print(2 + 2)")) # CodeBox directly

	bot.unload_all()
	"""

	class PackedLLMRunner:
	"""
	Loads a saved PackedLLM checkpoint and exposes everything it can do —
	the orchestrated pipeline, each individual expert, and the embedded
	modules (memory, web, code execution) — through one friendly interface.
	"""

	def __init__(
	self,
	checkpoint_path: Union[str, Path] = "PackedLLM.pt",
	map_location: str = "cpu",
	bot_id: Optional[str] = None,
	user_id: Optional[str] = None,
	warmup: bool = False,
	warmup_web: bool = False,
	warmup_vision: bool = False,
	warmup_action: bool = False,
	verbose: bool = True,
	):
	self.checkpoint_path = str(checkpoint_path)
	self.verbose = verbose

	removed = self._cleanup_temp_files()


	self._log(f"Loading checkpoint from {self.checkpoint_path} ...")

	t0 = time.perf_counter()
	self.model: PackedLLM = PackedLLM.load_checkpoint(
	self.checkpoint_path, map_location=map_location
	)
	self._load_seconds = time.perf_counter() - t0
	self._log(f"Loaded in {self._load_seconds:.1f}s")

	if bot_id:
	self.model.bot_id = bot_id
	if user_id:
	self.model.user_id = user_id

	self.warmup_report: Dict[str, Any] = {}
	if warmup:
	self.warmup_report = self.warmup(
	include_web=warmup_web,
	include_vision=warmup_vision,
	include_action=warmup_action,
	)

	@staticmethod
	def _cleanup_temp_files() -> dict:
	import shutil
	import tempfile
	from pathlib import Path

	temp_root = Path(tempfile.gettempdir())

	file_patterns = [
	"*.gguf",
	"*.pt",
	]

	dir_patterns = [
	"packedlm_*",
	"packedllm_*",
	"codebox_*",
	"websearch_*",
	]

	removed_files = 0
	removed_dirs = 0
	freed_bytes = 0
	errors = []

	for pattern in file_patterns:
	for path in temp_root.rglob(pattern):
	try:
	if not path.is_file():
	continue

	size = path.stat().st_size

	path.unlink(missing_ok=True)

	removed_files += 1
	freed_bytes += size

	except Exception as e:
	errors.append(f"{path}: {e}")

	for pattern in dir_patterns:
	for path in temp_root.rglob(pattern):
	try:
	if not path.is_dir():
	continue

	dir_size = 0
	for f in path.rglob("*"):
	try:
	if f.is_file():
	dir_size += f.stat().st_size
	except Exception:
	pass

	shutil.rmtree(path, ignore_errors=False)

	removed_dirs += 1
	freed_bytes += dir_size

	except Exception as e:
	errors.append(f"{path}: {e}")

	return {
	"removed_files": removed_files,
	"removed_dirs": removed_dirs,
	"freed_gb": round(freed_bytes / (1024 ** 3), 3),
	"errors": len(errors),
	"error_details": errors[:20], # keep return size reasonable
	}

	def __enter__(self) -> "PackedLLMRunner":
	return self

	def __exit__(self, exc_type, exc_val, exc_tb) -> None:
	self.unload_all()

	def _log(self, msg: str) -> None:
	if self.verbose:
	print(f"[PackedLLMRunner] {msg}")

	def warmup(
	self,
	include_web: bool = False,
	include_vision: bool = False,
	include_action: bool = False,
	) -> Dict[str, Any]:
	"""
	Touch each expert once so its weights/context are paged in and any
	lazy hydration (e.g. TranslationExpert's embedded bundle) happens
	now instead of on the user's first real request.

	web_expert, vision_expert, and action_expert are skipped by default
	since they involve network calls, require a real image, or have
	actual side effects (running code, writing files). Pass the
	matching include_* flag to warm those up too.
	"""
	report: Dict[str, Any] = {}

	probes = {
	"head_expert": lambda: self.model.head_expert("Say OK."),
	"creative_expert": lambda: self.model.creative_expert("Say OK in one short sentence."),
	"code_expert": lambda: self.model.code_expert("Write a one-line Python comment."),
	"logic_expert": lambda: self.model.logic_expert("Is 2 a prime number?", mode="deep_then_answer"),
	"math_expert": lambda: self.model.math_expert("What is 1 + 1?"),
	"affect_expert": lambda: self.model.affect_expert("I feel fine."),
	"role_expert": lambda: self.model.role_expert(
	"Say hi.", character_card="You are a helpful assistant."
	),
	"translation_expert": lambda: self.model.translation_expert("你好"),
	"tool_expert": lambda: self.model.tool_expert(
	"Say hi.",
	tools=[{
	"name": "noop",
	"description": "No-op warmup tool.",
	"parameters": {"type": "object", "properties": {}},
	}],
	),
	}

	if include_vision:
	probes["vision_expert"] = lambda: self.model.vision_expert(
	"Warmup probe; no real image provided.", image=None
	)
	if include_web:
	probes["web_expert"] = lambda: self.model.web_expert("What is today's date?")
	if include_action:
	probes["action_expert"] = lambda: self.model.action_expert(
	"Create request_report.txt containing exactly: warmup ok"
	)

	for name, probe in probes.items():
	t0 = time.perf_counter()
	try:
	probe()
	report[name] = round(time.perf_counter() - t0, 2)
	self._log(f"warmed {name} ({report[name]}s)")
	except Exception as exc:
	report[name] = f"error: {exc}"
	self._log(f"warmup failed for {name}: {exc}")

	return report


	def chat(
	self,
	prompt: str,
	image: Optional[str] = None,
	tools: Optional[List[Dict[str, Any]]] = None,
	stream: bool = False,
	deep_think: Optional[bool] = False,
	fast_think: Optional[bool] = False,
	):
	return self.model.forward(prompt, image=image, tools=tools, stream=stream, deep_think=deep_think, fast_think=fast_think)


	def creative(self, prompt: str, **kwargs: Any) -> str:
	return self.model.creative_expert(prompt, **kwargs)

	def code(self, prompt: str, **kwargs: Any) -> str:
	return self.model.code_expert(prompt, **kwargs)

	def logic(self, prompt: str, mode: str = "deep_then_answer", **kwargs: Any) -> str:
	return self.model.logic_expert(prompt, mode=mode, **kwargs)

	def math(self, prompt: str, **kwargs: Any) -> str:
	return self.model.math_expert(prompt, **kwargs)

	def translate(self, text: str, **kwargs: Any) -> str:
	return self.model.translation_expert(text, **kwargs)

	def affect(self, text: str, **kwargs: Any) -> str:
	return self.model.affect_expert(text, **kwargs)

	def role(self, prompt: str, character_card: Optional[str] = None, **kwargs: Any) -> str:
	if character_card is not None:
	kwargs.setdefault("character_card", character_card)
	return self.model.role_expert(prompt, **kwargs)

	def head(self, prompt: str, image: Optional[str] = None, **kwargs: Any) -> str:
	return self.model.head_expert(prompt, image=image, **kwargs)

	def vision(self, prompt: str, image: str, **kwargs: Any) -> str:
	return self.model.vision_expert(prompt, image=image, **kwargs)

	def tool(self, query: str, tools: List[Dict[str, Any]], **kwargs: Any) -> str:
	return self.model.tool_expert(query, tools=tools, **kwargs)

	def web(self, prompt: str, **kwargs: Any) -> str:
	return self.model.web_expert(prompt, **kwargs)

	def action(self, prompt: str, **kwargs: Any) -> str:
	return self.model.action_expert(prompt, **kwargs)

	@property
	def memory(self) -> Any:
	return self.model._memory_bank

	def memory_store(self, text: str, tags: Optional[List[str]] = None, importance: float = 0.7) -> Any:
	mb = self.memory
	if mb is None:
	raise RuntimeError("Memory bank is not mounted on this model.")
	if hasattr(mb, "store"):
	return mb.store(
	text=text,
	metadata={
	"bot_id": self.model.bot_id,
	"user_id": self.model.user_id,
	"timestamp": time.time(),
	},
	)
	if hasattr(mb, "store_knowledge"):
	return mb.store_knowledge(
	[text], tags=tags or ["manual"], source="PackedLLMRunner", importance=importance
	)
	raise RuntimeError("Memory bank has neither store() nor store_knowledge().")

	def memory_recall(self, query: str, top_k: int = 5, **kwargs: Any) -> Any:
	mb = self.memory
	if mb is None:
	return []
	for method_name in ("recall", "search", "query", "retrieve"):
	fn = getattr(mb, method_name, None)
	if callable(fn):
	try:
	return fn(query, top_k=top_k, **kwargs)
	except TypeError:
	return fn(query)
	return []

	def get_user_profile(self) -> Dict[str, Any]:
	return dict(self.model._user_profile)

	def get_bot_profile(self) -> Dict[str, Any]:
	return dict(self.model._bot_profile)

	def set_user_profile(self, updates: Dict[str, Any]) -> None:
	self.model._user_profile.update(updates)
	mb = self.memory
	if mb is None or not self.model.user_id:
	return
	if hasattr(mb, "set_profile"):
	mb.set_profile("user", self.model.user_id, self.model._user_profile)
	elif hasattr(getattr(mb, "gator", None), "set_profile"):
	mb.gator.set_profile("user", self.model.user_id, self.model._user_profile)

	def set_bot_profile(self, updates: Dict[str, Any]) -> None:
	self.model._bot_profile.update(updates)
	mb = self.memory
	if mb is None or not self.model.bot_id:
	return
	if hasattr(mb, "set_profile"):
	mb.set_profile("bot", self.model.bot_id, self.model._bot_profile)
	elif hasattr(getattr(mb, "gator", None), "set_profile"):
	mb.gator.set_profile("bot", self.model.bot_id, self.model._bot_profile)

	@property
	def web_module(self) -> Any:
	return self.model._web

	def web_search(self, query: str, deep_search: bool = False, **kwargs: Any) -> Any:
	"""Run a single raw web search with no LLM planning/synthesis wrapped around it."""
	if self.model._web is None:
	self.model._attach_web()
	if self.model._web is None:
	raise RuntimeError("Web module is unavailable.")
	try:
	return self.model._web.search(query, deep_search=deep_search, **kwargs)
	except TypeError:
	return self.model._web.search(query)

	@property
	def codebox(self) -> Any:
	return self.model._get_codebox()

	def run_code(
	self,
	code: str,
	venv_id: str = "runner_default",
	requirements: Optional[List[str]] = None,
	timeout: int = 120,
	max_ram_mb: int = 4096,
	ensure_venv: bool = True,
	) -> Dict[str, Any]:
	cb = self.codebox
	if cb is None:
	raise RuntimeError("CodeBox is unavailable.")
	if ensure_venv:
	try:
	if requirements:
	cb.create_venv(venv_id, requirements=requirements)
	else:
	cb.create_venv(venv_id)
	except Exception:
	pass # venv likely already exists
	return cb.run_code(
	code, venv_id=venv_id, requirements=None, timeout=timeout, max_ram_mb=max_ram_mb
	)

	def status(self) -> Dict[str, Any]:
	s = self.model.summary()
	s["load_seconds"] = round(self._load_seconds, 2)
	s["warmup_report"] = self.warmup_report
	return s

	def reload_expert(self, expert_name: str) -> Any:
	return self.model.reload_expert(expert_name)

	def unload_expert(self, expert_name: str) -> None:
	self.model.unload_expert(expert_name)

	def unload_all(self) -> None:
	self.model.unload_all()

	def save(self, path: Optional[Union[str, Path]] = None) -> None:
	self.model.save_checkpoint(str(path) if path else self.checkpoint_path)

	def __repr__(self) -> str:
	return f"PackedLLMRunner(checkpoint={self.checkpoint_path!r}, model={self.model!r})"

	if __name__ == "__main__":

	TEST_PROMPT = """
	You are managing a disaster response operation.

	A hurricane has hit a coastal city of 850,000 people.

	Available resources:

	- 120 medical teams
	- 80 search-and-rescue teams
	- 60 engineering crews
	- 40 water purification units
	- 25 helicopters
	- 15 cargo aircraft

	Situation:

	- 3 hospitals are operating at 140% capacity.
	- 2 hospitals are offline.
	- 18 bridges are damaged.
	- 120,000 people are without drinking water.
	- 75,000 people require evacuation.
	- Fuel reserves will last 6 days.
	- Weather forecasts predict another storm in 72 hours.

	Requirements:

	1. Create a 7-day response plan.
	2. Prioritize all resource allocation.
	3. Identify the three largest risks.
	4. Estimate where bottlenecks will occur.
	5. Explain tradeoffs between evacuation, medical care, and infrastructure repair.
	6. Give a contingency plan if 30% of resources become unavailable.
	7. Output a final executive summary under 150 words.

	Think carefully and provide detailed reasoning.
	"""

	bot = PackedLLMRunner(
	"PackedLLM.pt",
	bot_id="pip",
	user_id="test_user"
	)
	try:
	print("=" * 80)
	print("STATUS")
	print("=" * 80)
	print(bot.status())
	tests = [
	("BASELINE", {}),
	("FAST_THINK", {"fast_think": True}),
	("DEEP_THINK", {"deep_think": True}),
	("FAST+DEEP", {"fast_think": True, "deep_think": True}),
	]
	for name, kwargs in tests:
	print("\n" + "=" * 80)
	print(name)
	print("=" * 80)
	response = bot.chat(
	TEST_PROMPT,
	**kwargs
	)
	print(response)
	finally:
	try:
	bot.unload_all()
	except Exception:
	pass