Spaces:

Paulhayes
/

HiveEnglishLanguageTutor

Runtime error

App Files Files Community

HiveEnglishLanguageTutor / app.py

Paulhayes

Update app.py

2251977 verified 3 months ago

raw

history blame contribute delete

142 kB

	#!/usr/bin/env python3
	# --- BEGIN MEMORY MANIFEST (auto-updated) ---
	# (This block is auto-written by Hive to record what datasets/files
	# have already been converted into memory (curves). Do not edit by hand.)
	MEMORY_MANIFEST = {
	"updated_ts": 0,
	"datasets_done": [],
	"vectors_total": 0,
	"notes": "Set HIVE_ALLOW_SELF_WRITE_MANIFEST=0 to stop auto-updates."
	}
	# --- END MEMORY MANIFEST ---

	# -- coding: utf-8 --
	# HIVE 🐝 FULL MERGED ALL-IN-ONE OPTIMIZED
	# Offline-first + Online updates + Auto Wi-Fi + RBAC + Multilingual Voice (ASR/TTS + Phonics)
	# + Internal Optimization Stack (Change Manager: propose ➡️ sandbox ➡️ A/B test ➡️ apply/rollback with Owner policy)
	# Upload this single file and requirements.txt to a Hugging Face Space (or run locally).
	# - python hive_full_merged_all_in_one_optimized.py

	import os, sys, re, json, time, shutil, tempfile, subprocess, platform, socket, threading, importlib, hashlib, unicodedata, urllib.request, base64
	from dataclasses import dataclass
	from typing import Optional, List, Dict, Tuple

	# ----------- light bootstrap (safe) -----------
	def _ensure(pkgs):
	for p in pkgs:
	mod = p.split("==")[0].split(">=")[0].split("<=")[0].split("[")[0]
	try:
	importlib.import_module(mod)
	except Exception:
	try:
	subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", p])
	except Exception:
	pass

	_ensure(["numpy>=1.24.0","psutil>=5.9.0","requests>=2.31.0","gradio>=4.44.0","sentence-transformers>=3.0.0","faiss-cpu>=1.8.0",
	"transformers>=4.44.0","accelerate>=0.33.0","datasets>=2.21.0","soundfile>=0.12.1","faster-whisper>=1.0.0","langid>=1.1.6",
	"piper-tts>=1.2.0","g2p_en>=2.1.0","librosa>=0.10.1","scikit-learn>=1.1.0","feedparser>=6.0.11","duckduckgo_search>=6.2.10",
	"keyring>=24.3.1"])

	import numpy as np, psutil, requests, feedparser, langid, librosa, gradio as gr, soundfile as sf
	from sentence_transformers import SentenceTransformer
	from duckduckgo_search import DDGS
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	from faster_whisper import WhisperModel
	from piper.voice import PiperVoice
	from g2p_en import G2p
	from sklearn.metrics.pairwise import cosine_similarity

	try:
	import torch
	except Exception:
	torch=None

	try:
	import faiss
	except Exception:
	subprocess.check_call([sys.executable,"-m","pip","install","--upgrade","faiss-cpu>=1.8.0"])
	import faiss

	# Optional vision
	try:
	import cv2; _HAVE_CV=True
	except Exception:
	_HAVE_CV=False
	try:
	from PIL import Image
	import pytesseract; _HAVE_TESS=True and _HAVE_CV
	except Exception:
	_HAVE_TESS=False

	try:
	import keyring
	except Exception:
	keyring=None

	# ----------------------- config -----------------------
	def ENV(name, default=None, cast=str):
	v=os.getenv(name, default)
	if v is None: return None
	if cast is bool: return str(v).lower() in ("1","true","yes","on")
	if cast is int:
	try: return int(v)
	except (ValueError, TypeError): return int(float(v))
	return v

	CFG={
	# auto-archive memory to curves.tar.gz
	"HIVE_AUTO_ARCHIVE": ENV("HIVE_AUTO_ARCHIVE", "1", bool),
	"HIVE_AUTO_ARCHIVE_MODE": ENV("HIVE_AUTO_ARCHIVE_MODE", "per_chain", str), # per_chain \| per_dataset
	"HIVE_ARCHIVE_PATH": ENV("HIVE_ARCHIVE_PATH", "curves.tar.gz", str),
	# staged ingestion chaining (auto-run multiple stages this boot)
	"HIVE_INGEST_CHAIN": ENV("HIVE_INGEST_CHAIN", "1", bool),
	"HIVE_INGEST_CHAIN_MAX": ENV("HIVE_INGEST_CHAIN_MAX", "2", int), # max stages per boot
	# staged ingestion controls
	"HIVE_INGEST_STAGED": ENV("HIVE_INGEST_STAGED", "1", bool),
	"HIVE_INGEST_STAGE_SIZE": ENV("HIVE_INGEST_STAGE_SIZE", "3", int),
	"HIVE_INGEST_MIN_FREE_GB": ENV("HIVE_INGEST_MIN_FREE_GB", "8", int),
	"HIVE_INGEST_NEXT": ENV("HIVE_INGEST_NEXT", "0", bool), # run one stage this boot

	# self-edit manifest controls
	"HIVE_ALLOW_SELF_WRITE_MANIFEST": ENV("HIVE_ALLOW_SELF_WRITE_MANIFEST", "1", bool),
	"HIVE_SELF_WRITE_FILE": ENV("HIVE_SELF_WRITE_FILE", "", str),

	# memory auto-restore controls (admin memory)
	"HIVE_CURVES_AUTO_RESTORE": ENV("HIVE_CURVES_AUTO_RESTORE", "1", bool),
	"CURVES_ARCHIVE_LOCAL": ENV("HIVE_CURVES_ARCHIVE_LOCAL", "curves.tar.gz", str),
	"CURVES_ARCHIVE_URL": ENV("HIVE_CURVES_ARCHIVE_URL", "", str),
	"CURVES_HF_DATASET": ENV("HIVE_CURVES_HF_DATASET", "", str),
	"CURVES_HF_SUBPATH": ENV("HIVE_CURVES_HF_SUBPATH", "", str),
	"HF_READ_TOKEN": ENV("HF_READ_TOKEN", "", str),

	# memory directory alias
	"MEMORY_DIR": ENV("HIVE_CURVE_DIR", "./curves"),
	"CURVE_DIR": ENV("HIVE_CURVE_DIR","./curves"),
	"STATE_DIR": ENV("HIVE_STATE_DIR","./state"),
	"LAUNCH_UI": ENV("HIVE_LAUNCH_UI","1",bool),
	"LLM_AUTOSIZE": ENV("HIVE_LLM_AUTOSIZE","1",bool),
	"LLM_MAX_VRAM_GB": ENV("HIVE_LLM_MAX_VRAM_GB","0", int),
	"MODEL_OVERRIDE": ENV("HIVE_MODEL_ID",""),
	"CTX_TOKENS": ENV("HIVE_CTX_TOKENS","2048",int),
	"OWNER_NAME": ENV("HIVE_OWNER_USER","Rose"), # Default Owner name
	"OWNER_PASS": ENV("HIVE_OWNER_PASS","Fehr2008"), # Default Owner password
	"OWNER_SECOND": ENV("HIVE_OWNER_SECOND","Paulbear01"),
	"AGENT_NAME": ENV("HIVE_AGENT_NAME","Hive"),
	"NO_PROFANITY": ENV("HIVE_NO_PROFANITY","1",bool),
	"ASR_SIZE": ENV("HIVE_ASR_SIZE","small"),
	"TTS_LANG": ENV("HIVE_TTS_LANG","en"),
	"BOOTSTRAP_INGEST": ENV("HIVE_BOOTSTRAP_INGEST","1",bool),
	"FORCE_REINGEST": ENV("HIVE_FORCE_REINGEST","0",bool),
	"INGEST_SOURCES": ENV("HIVE_INGEST_SOURCES",""),
	"ONLINE_ENABLE": ENV("HIVE_ONLINE_ENABLE","1",bool),
	"ONLINE_AUTO": ENV("HIVE_ONLINE_AUTO","0",bool),
	"ONLINE_SOURCES": ENV("HIVE_ONLINE_SOURCES","https://hnrss.org/frontpage,https://rss.nytimes.com/services/xml/rss/nyt/World.xml"),
	"ONLINE_TIMEOUT": ENV("HIVE_ONLINE_TIMEOUT","8",int),
	"ONLINE_MAX_RESULTS": ENV("HIVE_ONLINE_MAX_RESULTS","5",int),
	"ONLINE_TRIGGER": ENV("HIVE_ONLINE_TRIGGER","auto",str),
	# bounded self governance
	"HIVE_USE_HF_INFERENCE": ENV("HIVE_USE_HF_INFERENCE","0",bool),
	"HIVE_HF_ENDPOINT": ENV("HIVE_HF_ENDPOINT","",str),
	"ALLOW_SELF_REBOOT": ENV("HIVE_ALLOW_SELF_REBOOT","1",bool),
	"ALLOW_RUNTIME_HOTPATCH": ENV("HIVE_ALLOW_RUNTIME_HOTPATCH","1",bool),
	"AUTO_SELF_OPTIMIZE": ENV("HIVE_AUTO_SELF_OPTIMIZE","1",bool),
	# internal optimization with sandbox + A/B (Owner policy)
	"OPT_ENABLE": ENV("HIVE_OPT_ENABLE","1",bool),
	"OPT_AUTO_APPLY": ENV("HIVE_OPT_AUTO_APPLY","0",bool), # OWNER MAY SET TO 1
	"OPT_PKG_ALLOWLIST": ENV("HIVE_OPT_PKG_ALLOWLIST","transformers,accelerate,datasets,sentence-transformers,faiss-cpu,duckduckgo_search,feedparser,requests,gradio").split(","),
	"OPT_MODEL_ALLOWLIST": ENV("HIVE_OPT_MODEL_ALLOWLIST","meta-llama/Meta-Llama-3.1-8B-Instruct,meta-llama/Meta-Llama-3.1-70B-Instruct,TinyLlama/TinyLlama-1.1B-Chat-v1.0").split(","),
	"OPT_THRESH_LATENCY_MS": ENV("HIVE_OPT_THRESH_LATENCY_MS","0",int),
	"OPT_THRESH_TOKS_PER_S": ENV("HIVE_OPT_THRESH_TOKS_PER_S","0",float),
	"OPT_THRESH_QUALITY": ENV("HIVE_OPT_THRESH_QUALITY","0.02",float),
	"OPT_SANDBOX_TIMEOUT": ENV("HIVE_OPT_SANDBOX_TIMEOUT","180",int),
	}
	os.makedirs(CFG["CURVE_DIR"], exist_ok=True)
	os.makedirs(CFG["STATE_DIR"], exist_ok=True)

	OVERLAY_DIR = os.path.join(CFG["STATE_DIR"], "runtime_overlay")
	RUNTIME_OVERRIDES = os.path.join(CFG["STATE_DIR"], "runtime_overrides.json")
	OPT_DIR = os.path.join(CFG["STATE_DIR"], "opt")
	OPT_PROPOSALS = os.path.join(OPT_DIR, "proposals.jsonl")
	OPT_RESULTS = os.path.join(OPT_DIR, "results.jsonl")
	for p in (OVERLAY_DIR, OPT_DIR):
	os.makedirs(p, exist_ok=True)

	# ----------------- sensing / model pick -----------------
	def _has_gpu_env()->bool:
	accel=os.getenv("SPACE_ACCELERATOR","").lower()
	if accel in ("t4","a10","a100","l4","l40","h100"): return True
	try: return torch is not None and torch.cuda.is_available()
	except Exception: return False

	def probe_caps():
	free_gb = shutil.disk_usage(".").free/(1024**3)
	ram_gb = psutil.virtual_memory().available/(1024**3)
	return {"free_gb":free_gb,"ram_gb":ram_gb,"gpu":_has_gpu_env(),
	"max_docs":70000 if ram_gb>16 else (50000 if ram_gb>8 else 12000),
	"batch":512 if ram_gb>16 else (256 if ram_gb>8 else 64)}

	CANDIDATES=[
	("TinyLlama/TinyLlama-1.1B-Chat-v1.0", 0),
	("meta-llama/Meta-Llama-3.1-8B-Instruct",12),
	("meta-llama/Meta-Llama-3.1-70B-Instruct",100)
	]
	def pick_model()->Tuple[str,dict]:
	if CFG["MODEL_OVERRIDE"]:
	return CFG["MODEL_OVERRIDE"], {"device":"cuda" if _has_gpu_env() else "cpu"}
	max_vram=CFG["LLM_MAX_VRAM_GB"]
	if _has_gpu_env():
	for mid,need in reversed(CANDIDATES):
	if need and (max_vram==0 or need<=max_vram):
	return mid, {"device":"cuda"}
	else:
	ram=psutil.virtual_memory().total/(1024**3)
	for mid,need in reversed(CANDIDATES):
	if need==0 and ram>=6: return mid, {"device":"cpu"}
	return "TinyLlama/TinyLlama-1.1B-Chat-v1.0", {"device":"cpu"}

	# ----------------- embeddings / curves -----------------
	_EMB_ID=os.getenv("HIVE_EMB_ID","sentence-transformers/all-MiniLM-L6-v2")
	class GEC:
	def __init__(self): self.model=SentenceTransformer(_EMB_ID)
	def encode(self, texts: List[str]): return self.model.encode(texts, normalize_embeddings=True)

	class CurveStore:
	def __init__(self, d):
	self.dir=d; os.makedirs(d, exist_ok=True)
	self.idx_path=os.path.join(d,"faiss.index")
	self.meta_path=os.path.join(d,"meta.jsonl")
	self.dim=384; self.gec=GEC()
	self.index=faiss.read_index(self.idx_path) if os.path.exists(self.idx_path) else faiss.IndexFlatIP(self.dim)
	def add_texts(self, docs:List[str], metas:List[Dict]):
	if not docs: return
	vecs=np.asarray(self.gec.encode(docs), dtype="float32")
	self.index.add(vecs)
	with open(self.meta_path,"a",encoding="utf-8") as f:
	for m in metas: f.write(json.dumps(m, ensure_ascii=False)+"\n")
	faiss.write_index(self.index, self.idx_path)
	def search(self, query:str, k:int=6)->List[Dict]:
	if self.index.ntotal==0: return []
	qv=np.asarray(self.gec.encode([query]), dtype="float32")
	D,I=self.index.search(qv,k)
	lines=open(self.meta_path,"r",encoding="utf-8").read().splitlines() if os.path.exists(self.meta_path) else []
	out=[]
	for i in I[0]:
	if 0<=i<len(lines):
	try: out.append(json.loads(lines[i]))
	except json.JSONDecodeError: pass
	return out
	def search_with_scores(self, query:str, k:int=6):
	if self.index.ntotal==0: return [], []
	qv=np.asarray(self.gec.encode([query]), dtype="float32")
	D,I=self.index.search(qv,k)
	lines=open(self.meta_path,"r",encoding="utf-8").read().splitlines() if os.path.exists(self.meta_path) else []
	metas, scores = [], []
	for idx, sc in zip(I[0], D[0]):
	if 0<=idx<len(lines):
	try:
	metas.append(json.loads(lines[idx]))
	scores.append(float(max(0.0, min(1.0, sc if sc is not None else 0.0))))
	except: pass
	return metas, scores

	OFFLINE_MARK = os.path.join(CFG["CURVE_DIR"], ".offline_ready")
	def _curves_ready(curve_dir:str)->bool:
	idx=os.path.join(curve_dir,"faiss.index")
	if os.path.exists(OFFLINE_MARK):
	try: return json.load(open(OFFLINE_MARK)).get("ok",True)
	except Exception: return True
	if os.path.exists(idx):
	try: return faiss.read_index(idx).ntotal>0
	except Exception: return False
	return False
	def _mark_offline_ready():
	try: json.dump({"ok":True,"ts":time.time()}, open(OFFLINE_MARK,"w",encoding="utf-8"))
	except Exception: pass

	# ----------- HF Datasets bootstrap -----------
	DEFAULT_SOURCES=["jhu-clsp/jflue","bea2019st/wi_locness","fce-m2109/mascorpus","rajpurkar/squad_v2",
	"OpenRL/daily_dialog","tetti/spelling-dataset-extended","Helsinki-NLP/opus-100","facebook/flores",
	"HuggingFaceH4/no_robots","bigscience/xP3","allenai/sciq","allenai/c4",
	"mozilla-foundation/common_voice_17_0","bene-ges/en_cmudict","openslr/librispeech_asr","conceptnet5/conceptnet5","grammarly/coedit"]

	def _iter_text(dataset_name:str, split="train"):
	from datasets import load_dataset
	try:
	ds=load_dataset(dataset_name, split=split, streaming=True)
	except Exception:
	ds=load_dataset(dataset_name, split=split, trust_remote_code=True)
	for ex in ds:
	text = ex.get("text") or ex.get("sentence") or ex.get("content") or ex.get("question")
	if not text:
	if "translation" in ex and isinstance(ex["translation"], dict):
	tdict=ex["translation"]; text=" \| ".join([f"{k}:{v}" for k,v in tdict.items() if isinstance(v,str)])
	else:
	text=str(ex)
	yield {"text": str(text)}

	def _plan_order(srcs: List[str])->List[str]:
	first=["jhu-clsp/jflue","bea2019st/wi_locness","fce-m2109/mascorpus","rajpurkar/squad_v2","OpenRL/daily_dialog","tetti/spelling-dataset-extended"]
	ordered=[s for s in first if s in srcs]
	for s in srcs:
	if s not in ordered: ordered.append(s)
	return ordered

	class LibrarianCurve:
	def __init__(self, store): self.store=store
	def ingest_pairs(self, texts, metas, scope):
	metas_scoped=[]
	for m,t in zip(metas,texts):
	m2=dict(m); m2["scope"]=scope; m2["text"]=t[:500]
	metas_scoped.append(m2)
	self.store.add_texts(texts, metas_scoped)
	def retrieve_scoped_with_scores(self, query, effective_role, caller_id, k=6):
	items, scores = self.store.search_with_scores(query, k=k*4)
	if effective_role=="owner": return items[:k], scores[:k]
	allowed={"general"}
	if caller_id: allowed.add(f"user:{caller_id}")
	filt_i,filt_s=[],[]
	for it,sc in zip(items, scores):
	if it.get("scope","general") in allowed:
	filt_i.append(it); filt_s.append(sc)
	if len(filt_i)>=k: break
	return filt_i, filt_s

	def ingest_all(curve_dir:str, sources: Optional[List[str]]=None, scope="general"):
	caps=probe_caps()
	store=CurveStore(curve_dir); lib=LibrarianCurve(store)
	os.makedirs(curve_dir, exist_ok=True)
	logf=os.path.join(curve_dir,"ingest_log.jsonl")
	count_total=0; sources=sources or DEFAULT_SOURCES
	for ds in _plan_order(sources):
	count=0; bt=[]; bm=[]
	try:
	for rec in _iter_text(ds):
	txt=(rec.get("text") or "").strip()
	if not txt: continue
	bt.append(txt); bm.append({"dataset":ds,"text":txt[:500]})
	if len(bt)>=caps["batch"]:
	lib.ingest_pairs(bt,bm,scope); count+=len(bt); count_total+=len(bt); bt,bm=[],[]
	if count>=caps["max_docs"]: break
	if bt: lib.ingest_pairs(bt,bm,scope); count+=len(bt); count_total+=len(bt); bt,bm=[],[]
	with open(logf,"a",encoding="utf-8") as f: f.write(json.dumps({"dataset":ds,"ingested":count})+"\n")
	except Exception as e:
	with open(logf,"a",encoding="utf-8") as f: f.write(json.dumps({"dataset":ds,"error":str(e)})+"\n")
	return count_total

	# ----------- live search + RSS ➡️ curves -----------
	ONLINE_DB=os.path.join(CFG["STATE_DIR"],"online_seen.json")
	def _load_json(path, default):
	if os.path.exists(path):
	try: return json.load(open(path,"r",encoding="utf-8"))
	except Exception: return default
	return default
	def _save_json(path, data): json.dump(data, open(path,"w",encoding="utf-8"), indent=2)

	def online_available(timeout:int)->bool:
	try:
	requests.get("https://huggingface.co", timeout=timeout)
	return True
	except Exception:
	return False

	def _hash(s:str)->str:
	return hashlib.sha1(s.encode("utf-8","ignore")).hexdigest()

	def fetch_rss(urls:List[str], timeout:int=8, limit:int=50)->List[Dict]:
	items=[]
	for u in urls:
	try:
	f=feedparser.parse(u)
	for e in f.entries[:limit]:
	items.append({"title":e.get("title",""),"link":e.get("link",""),"summary":e.get("summary") or e.get("description",""),"published":e.get("published") or e.get("updated",""),"source":u})
	except Exception:
	# consider logging this error
	pass
	return items

	def web_search_snippets(query:str, max_results:int=5, timeout:int=8)->list:
	out=[]
	try:
	with DDGS(timeout=timeout) as ddgs:
	for r in ddgs.text(query, max_results=max_results):
	if r and r.get("body"):
	out.append({"title":r.get("title",""),"href":r.get("href",""),"body":r.get("body","")})
	except Exception:
	# consider logging this error
	pass
	return out

	# ----------- RBAC / users / lockouts -----------
	USERS_DB=os.path.join(CFG["STATE_DIR"],"users.json")
	LOCKS_DB=os.path.join(CFG["STATE_DIR"],"lockouts.json")
	VOICES_DB=os.path.join(CFG["STATE_DIR"],"voices.json")
	ADAPT_DB=os.path.join(CFG["STATE_DIR"],"speech_adapt.json")

	def _init_users():
	d={"owner":{"id":"owner:1","name":CFG["OWNER_NAME"],"role":"owner","pass":CFG["OWNER_PASS"],"second":CFG["OWNER_SECOND"],"prefs":{"activation_names":[CFG["AGENT_NAME"]],"language":"en"}},
	"admins_super":[],"admins_general":[],"users":[]}
	_save_json(USERS_DB,d); return d
	def _load_users():
	d=_load_json(USERS_DB, None); return d if d else _init_users()
	def _find_user(d, name_or_id):
	pools=[("owner",[d.get("owner")]),("admin_super",d["admins_super"]),("admin_general",d["admins_general"]),("user",d["users"])]
	for role,pool in pools:
	for u in pool or []:
	if u and (u.get("id")==name_or_id or u.get("name")==name_or_id): return u, role
	return None, None

	PERMS={
	"owner":{"can_add":["admin_super","admin_general","user"],"can_remove":["admin_super","admin_general","user"],
	"can_edit_role_of":["admin_super","admin_general","user"],"can_edit_profile_of":["owner","admin_super","admin_general","user"],
	"can_view_scopes":"all","maintenance":"full","code_edit":"approve_and_edit"},
	"admin_super":{"can_add":["admin_general","user"],"can_remove":["admin_general","user"],
	"can_edit_role_of":["admin_general","user"],"can_edit_profile_of":["admin_general","user"],
	"can_view_scopes":"self_only","maintenance":"advanced","code_edit":"suggest_only"},
	"admin_general":{"can_add":["user"],"can_remove":["user"],"can_edit_role_of":["user"],"can_edit_profile_of":["user"],
	"can_view_scopes":"self_only","maintenance":"basic","code_edit":"suggest_only"},
	"user":{"can_add":[],"can_remove":[],"can_edit_role_of":[],"can_edit_profile_of":["user"],
	"can_view_scopes":"self_only","maintenance":"none","code_edit":"none"},
	"guest":{"can_add":[],"can_remove":[],"can_edit_role_of":[],"can_edit_profile_of":[],
	"can_view_scopes":"self_only","maintenance":"none","code_edit":"none"},
	}

	def attempt_login(name_or_id:str, password:str="", second:Optional[str]=None):
	d=_load_users(); locks=_load_json(LOCKS_DB,{ })
	def lock_fail(lid, msg):
	st=locks.get(lid, {"fails":0,"until":0}); st["fails"]=st.get("fails",0)+1
	dur=180 if st["fails"]>=3 else 0; st["until"]=time.time()+dur if dur else 0
	locks[lid]=st; _save_json(LOCKS_DB,locks); return False, msg
	u,_=_find_user(d, name_or_id)
	if not u: return False, "Profile not found."
	role=u.get("role","user"); lid=u.get("id", u.get("name")); now=time.time()
	st=locks.get(lid, {"fails":0,"until":0})
	if now < st.get("until",0): return False, f"Locked; try again in ~{int(st['until']-now)}s."
	if role in ("admin_general","admin_super","owner"):
	if role=="owner":
	if password!=u.get("pass") or (u.get("second") and second!=u.get("second")):
	return lock_fail(lid, "Owner credentials incorrect.")
	else:
	if password!=u.get("pass"): return lock_fail(lid, "Admin password incorrect.")
	locks[lid]={"fails":0,"until":0}; _save_json(LOCKS_DB,locks)
	return True, f"Welcome, {u.get('name')} ({role})."

	# ----------- voice: ASR/TTS/phonics -----------
	G2P = G2p()
	ASR_MODELS={"tiny":"tiny","base":"base","small":"small","medium":"medium","large-v3":"large-v3"}
	def _asr_model_name(): return ASR_MODELS.get(CFG["ASR_SIZE"],"small")
	_ASR=None
	def get_asr():
	global _ASR
	if _ASR is not None: return _ASR
	size=_asr_model_name(); device="cuda" if (_has_gpu_env()) else "cpu"
	compute_type="float16" if device=="cuda" else "int8"
	_ASR=WhisperModel(size, device=device, compute_type=compute_type); return _ASR

	PIPER_MODELS={
	"en": ("https://github.com/rhasspy/piper/releases/download/v0.0.2/en_US-amy-low.onnx",
	"https://github.com/rhasspy/piper/releases/download/v0.0.2/en_US-amy-low.onnx.json"),
	"es": ("https://github.com/rhasspy/piper/releases/download/v0.0.2/es_ES-davefx-medium.onnx",
	"https://github.com/rhasspy/piper/releases/download/v0.0.2/es_ES-davefx-medium.onnx.json"),
	"fr": ("https://github.com/rhasspy/piper/releases/download/v0.0.2/fr_FR-gilles-medium.onnx",
	"https://github.com/rhasspy/piper/releases/download/v0.0.2/fr_FR-gilles-medium.onnx.json"),
	"de": ("https://github.com/rhasspy/piper/releases/download/v0.0.2/de_DE-thorsten-low.onnx",
	"https://github.com/rhasspy/piper/releases/download/v0.0.2/de_DE-thorsten-low.onnx.json"),
	"zh": ("https://github.com/rhasspy/piper/releases/download/v0.0.2/zh_CN-huayan-low.onnx",
	"https://github.com/rhasspy/piper/releases/download/v0.0.2/zh_CN-huayan-low.onnx.json"),
	}
	def _download(url,dst, timeout=30):
	if os.path.exists(dst): return dst
	os.makedirs(os.path.dirname(dst),exist_ok=True); urllib.request.urlretrieve(url,dst); return dst # TODO: add timeout
	_TTS_CACHE={}
	def get_tts(lang="en") -> PiperVoice:
	lang=lang if lang in PIPER_MODELS else "en"
	if lang in _TTS_CACHE: return _TTS_CACHE[lang]
	mu,cu=PIPER_MODELS[lang]; m=_download(mu,f"./models/piper/{os.path.basename(mu)}"); c=_download(cu,f"./models/piper/{os.path.basename(cu)}")
	v=PiperVoice.load(m,c); _TTS_CACHE[lang]=v; return v

	def _embed_mfcc(path)->np.ndarray:
	y, sr = librosa.load(path, sr=16000)
	mf=librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
	return mf.mean(axis=1)
	def enroll_voice(uid:str, path:str) -> bool:
	db=_load_json(VOICES_DB, {}); db[uid]=_embed_mfcc(path).astype(float).tolist(); _save_json(VOICES_DB, db); return True
	def identify_voice(path:str, threshold:float=0.70) -> Optional[str]:
	db=_load_json(VOICES_DB, {});
	if not db: return None
	emb=_embed_mfcc(path).reshape(1,-1)
	keys=list(db.keys()); mats=np.array([db[k] for k in keys])
	sims=cosine_similarity(emb, mats)[0]; i=int(np.argmax(sims)); return keys[i] if sims[i]>=threshold else None

	_BASIC={'a':'a as in apple /æ/','e':'e as in elephant /ɛ/','i':'i as in igloo /ɪ/','o':'o as in octopus /ɒ/','u':'u as in umbrella /ʌ/',
	'c':'c as in cat /k/ (before e/i/y often /s/)','g':'g as in goat /g/ (before e/i/y often soft /dʒ/)','y':'y as in yellow /j/ or happy /i/'}
	def phonics(word:str)->str:
	toks=G2P(word); phones=[t for t in toks if re.match(r"[A-Z]+[0-2]?$", t)]
	hints=[];
	for ch in word.lower():
	if ch in _BASIC and _BASIC[ch] not in hints: hints.append(_BASIC[ch])
	return f"Phonemes: {' '.join(phones)} \| Hints: {('; '.join(hints)) if hints else '🐝'}"

	def lid_chunk(text:str, min_len:int=12)->List[Tuple[str,str]]:
	parts=re.split(r"([.!?;\u2026\u2028\u2029])+\s{2,}\|", text)
	chunks=[]; buf=""
	for p in parts:
	if not p: continue
	buf+=p
	if len(buf)>=min_len or re.match(r"[.!?;\u2026\u2028\u2029]", p):
	lang,_=langid.classify(buf.strip()); chunks.append((buf.strip(), lang)); buf=""
	if buf.strip():
	lang,_=langid.classify(buf.strip()); chunks.append((buf.strip(), lang))
	return chunks

	def asr_transcribe(path:str, uid: Optional[str], forced_lang: Optional[str]=None)->str:
	model=get_asr()
	prior=_load_json(ADAPT_DB,{}).get(uid or "guest",{}).get("lang_prior")
	language=forced_lang or prior or None
	segs, info = model.transcribe(path, language=language, beam_size=5, vad_filter=True)
	text=" ".join([s.text for s in segs]) if segs else ""
	if not forced_lang and text.strip():
	lid,_=langid.classify(text); prof=_load_json(ADAPT_DB,{}); p=prof.get(uid or "guest",{}); p["lang_prior"]=lid; prof[uid or "guest"]=p; _save_json(ADAPT_DB,prof)
	return text

	def synthesize_multilang(text:str, fallback="en")->str:
	chunks=lid_chunk(text)
	sr=None; mix=None
	for ch, lg in chunks or [(text, fallback)]:
	lg2=lg if lg in PIPER_MODELS else fallback
	v=get_tts(lg2); aud, _ = v.synthesize(ch)
	if sr is None: sr=v.sample_rate
	mix = aud if mix is None else np.concatenate([mix,aud])
	outp=os.path.join(tempfile.gettempdir(), f"hive_tts_{int(time.time())}.wav")
	sf.write(outp, mix if mix is not None else np.zeros(1), sr or 22050, subtype="PCM_16")
	return outp

	# ----------- compiler / engine -----------
	class OCRagRanker:
	def execute(self, query, candidates):
	words=set(re.findall(r"\w+", query.lower()))
	def score(x):
	t=(x.get("text","") or "").lower()
	overlap=sum(1 for w in words if w in t)
	return overlap*2 + min(len(t),300)/300.0
	return sorted(candidates, key=score, reverse=True)
	class OCPromptMinimizer:
	def execute(self, snippets, budget):
	out=[]; total=0
	for s in snippets:
	t=(s.get("text","") or "")[:300]
	if total+len(t)<=budget:
	out.append(t); total+=len(t)
	return out
	OC_REG={"rag_ranker": OCRagRanker(), "prompt_minimizer": OCPromptMinimizer()}

	class PromptCompiler:
	def __init__(self):
	self.override_head=None
	self.override_budget=None
	def compile(self, user_msg, snippets, token_budget=600):
	if self.override_budget: token_budget=self.override_budget
	ranked=OC_REG["rag_ranker"].execute(user_msg, snippets)
	chosen=OC_REG["prompt_minimizer"].execute(ranked, budget=max(200, token_budget//3))
	head=self.override_head if isinstance(self.override_head,str) else "Use the brief, relevant facts below.\n"
	body="\n".join([f"- {t}" for t in chosen])
	return f"{head}{body}\n\nUser: {user_msg}\nAssistant:"

	class EngineCurve:
	def __init__(self):
	self.stats={"runs":0,"ok":0,"latency_ms":[]}
	self.router_rules=[]
	def choose_route(self, msg:str)->str:
	for pat in self.router_rules or []:
	if isinstance(pat, re.Pattern) and pat.search(msg):
	s=pat.pattern.lower()
	if "translation" in s: return "translation"
	if "vision" in s: return "vision"
	return "general"
	def run(self, message:str, snippets:List[Dict])->Dict:
	t0=time.time(); _route=self.choose_route(message); t1=time.time()
	self.stats["runs"]+=1; self.stats["ok"]+=1; self.stats["latency_ms"].append(int((t1-t0)*1000))
	return {"ok":True,"route":_route}

	# ----------- wifi auto-connect (non-blocking) -----------
	NET_STATE_DB=os.path.join(CFG["STATE_DIR"],"wifi_known.json")

	def _os_name(): return platform.system().lower()
	def _fast_probe(host="8.8.8.8", port=53, timeout=1.5)->bool:
	try:
	socket.setdefaulttimeout(timeout)
	s=socket.socket(socket.AF_INET, socket.SOCK_STREAM); s.connect((host,port)); s.close()
	return True
	except Exception:
	return False
	def _http_probe(url="https://huggingface.co", timeout=2.5)->float:
	try:
	t0=time.time(); r=requests.head(url, timeout=timeout)
	if r.status_code<500: return (time.time()-t0)*1000.0
	except Exception: pass
	return -1.0
	def _load_known()->List[dict]:
	data=_load_json(NET_STATE_DB, []); out=[]
	for d in data:
	if isinstance(d,dict) and "ssid" in d:
	out.append({"ssid":d["ssid"],"priority":int(d.get("priority",0))})
	out.sort(key=lambda x: x.get("priority",0), reverse=True); return out
	def _get_saved_password(ssid:str)->Optional[str]:
	if keyring:
	try: return keyring.get_password("hive_wifi", ssid) or ""
	except Exception: return None
	return None
	def _connect_linux(ssid, password, timeout=12)->Tuple[bool,str]:
	try:
	cmd=["nmcli","device","wifi","connect",ssid]+(["password",password] if password else [])
	p=subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
	return (p.returncode==0), (p.stdout or p.stderr or "").strip()
	except Exception as e: return False, f"nmcli error: {e}"
	def _connect_windows(ssid, password)->Tuple[bool,str]:
	try:
	p=subprocess.run(["netsh","wlan","connect","name="+ssid,"ssid="+ssid], capture_output=True, text=True)
	if p.returncode==0 and "success" in (p.stdout+p.stderr).lower(): return True,"Connected."
	if not password: return False,"No saved password."
	xml=f'''<?xml version="1.0"?>
	<WLANProfile xmlns="http://www.microsoft.com/networking/WLAN/profile/v1">
	<name>{ssid}</name><SSIDConfig><SSID><name>{ssid}</name></SSID></SSIDConfig>
	<connectionType>ESS</connectionType><connectionMode>auto</connectionMode>
	<MSM><security><authEncryption><authentication>WPA2PSK</authentication>
	<encryption>AES</encryption><useOneX>false</useOneX></authEncryption>
	<sharedKey><keyType>passPhrase</keyType><protected>false</protected>
	<keyMaterial>{password}</keyMaterial></sharedKey></security></MSM></WLANProfile>'''
	tmp=os.path.join(os.getenv("TEMP","/tmp"), f"wifi_{int(time.time())}.xml"); open(tmp,"w",encoding="utf-8").write(xml)
	a=subprocess.run(["netsh","wlan","add","profile","filename="+tmp,"user=all"], capture_output=True, text=True)
	if a.returncode!=0: return False, a.stderr or a.stdout or "add profile failed"
	c=subprocess.run(["netsh","wlan","connect","name="+ssid,"ssid="+ssid], capture_output=True, text=True)
	return (c.returncode==0), (c.stderr or c.stdout or "").strip()
	except Exception as e: return False, f"netsh error: {e}"
	def _connect_macos(ssid, password)->Tuple[bool,str]:
	try:
	out=subprocess.check_output(["networksetup","-listallhardwaresports"], stderr=subprocess.DEVNULL).decode("utf-8","ignore")
	dev=None
	for block in out.split("\n\n"):
	if "Wi-Fi" in block or "AirPort" in block:
	for l in block.splitlines():
	if l.strip().startswith("Device:"): dev=l.split(":",1)[1].strip(); break
	if dev: break
	if not dev: return False,"Wi-Fi device not found"
	cmd=["networksetup","-setairportnetwork",dev, ssid]+([password] if password else [])
	p=subprocess.run(cmd, capture_output=True, text=True)
	return (p.returncode==0), (p.stderr or p.stdout or "").strip()
	except Exception as e: return False, f"networksetup error: {e}"
	def _connect_os(ssid,password,timeout=12)->Tuple[bool,str]:
	osn=_os_name()
	if osn=="linux": return _connect_linux(ssid,password,timeout)
	if osn=="windows": return _connect_windows(ssid,password)
	if osn=="darwin": return _connect_macos(ssid,password)
	return False, f"Unsupported OS: {osn}"

	class AutoConnector:
	def __init__(self):
	self.last_attempt=0.0; self.cooldown_s=30.0; self.per_ssid_timeout=10.0; self.total_budget_s=18.0; self.thread=None; self._lock=threading.Lock()
	def online_quick(self)->bool: return _fast_probe(timeout=1.2)
	def quality_ms(self)->float: return _http_probe(timeout=2.0)
	def _run_once(self):
	if self.online_quick(): return
	known=_load_known();
	if not known: return
	t_start=time.time()
	for item in known:
	if time.time()-t_start>self.total_budget_s: return
	ssid=item["ssid"]; pw=_get_saved_password(ssid)
	ok,_msg=_connect_os(ssid,pw,timeout=int(self.per_ssid_timeout))
	if ok and self.online_quick(): return
	def kick_async(self):
	with self._lock:
	now=time.time()
	if now-self.last_attempt<self.cooldown_s: return
	self.last_attempt=now
	if self.thread and self.thread.is_alive(): return # type: ignore
	self.thread=threading.Thread(target=self.run_once, daemon=True); self.thread.start()

	NET=AutoConnector()

	# ----------- coverage heuristic -----------
	def coverage_score_from_snippets(snippets: list, scores: list) -> float:
	if not snippets or not scores: return 0.0
	s = sorted(scores, reverse=True)[:3]
	base = sum(s)/len(s) if s else 0.0
	bonus = min(0.15, 0.03 * len(snippets))
	return float(max(0.0, min(1.0, base + bonus)))

	# ----------- overlay / hotpatch -----------
	ALLOWED_PATCH_KEYS={"prompt_head","retrieval_k","token_budget","temperature","router_rules","web_threshold"}
	def _load_overrides():
	if os.path.exists(RUNTIME_OVERRIDES):
	try: return json.load(open(RUNTIME_OVERRIDES,"r",encoding="utf-8"))
	except Exception: return {}
	return {}
	def _save_overrides(ovr:dict):
	json.dump(ovr, open(RUNTIME_OVERRIDES,"w",encoding="utf-8"), indent=2)

	class RuntimeOverlay:
	def __init__(self): self.ovr=_load_overrides()
	def apply_to(self, hive: "Hive"):
	o=self.ovr or {}
	if isinstance(o.get("prompt_head"),str): hive.compiler.override_head=o["prompt_head"]
	if isinstance(o.get("token_budget"),int): hive.compiler.override_budget=max(256, min(8192, o["token_budget"]))
	hive.retrieval_k=int(o.get("retrieval_k",6)); hive.retrieval_k=max(3,min(24,hive.retrieval_k))
	hive.decoding_temperature=float(o.get("temperature",0.7)); hive.decoding_temperature=max(0.0,min(1.5,hive.decoding_temperature))
	rr=o.get("router_rules") or []
	if isinstance(rr,list):
	try: hive.engine.router_rules=[re.compile(pat,re.I) for pat in rr if isinstance(pat,str) and pat]
	except re.error: hive.engine.router_rules=[]
	t=o.get("web_threshold",None); hive.web_threshold=float(t) if isinstance(t,(int,float)) else 0.40
	def patch(self, patch:dict, actor_role:str="hive")->Tuple[bool,str]:
	if not CFG["ALLOW_RUNTIME_HOTPATCH"]: return False,"Runtime hotpatch disabled."
	if actor_role not in ("hive","admin_general","admin_super","owner"): return False,"Unauthorized actor."
	for k in list(patch.keys()):
	if k not in ALLOWED_PATCH_KEYS: patch.pop(k,None)
	if not patch: return False,"No allowed keys."
	self.ovr.update(patch); _save_overrides(self.ovr); return True,"Patched."

	# ----------- safe reboot -----------
	def _persist_before_reboot():
	try: json.dump({"ts":time.time(),"note":"self-reboot"}, open(os.path.join(CFG["STATE_DIR"],"last_reboot.json"),"w",encoding="utf-8"))
	except Exception: pass
	def safe_reboot(reason:str="optimization"):
	if not CFG["ALLOW_SELF_REBOOT"]: return False,"Self-reboot disabled."
	_persist_before_reboot()
	try:
	os.execv(sys.executable, [sys.executable, os.path.abspath(__file__)] + sys.argv[1:])
	except Exception:
	os._exit(3)
	return True, f"Rebooting: {reason}"

	# ----------- self optimizer (bounded) -----------
	class SelfOptimizer(threading.Thread):
	def __init__(self, hive: "Hive"):
	super().__init__(daemon=True); self.hive=hive; self.stop=False; self.tick=45.0
	def run(self):
	while not self.stop:
	time.sleep(self.tick)
	if not CFG["AUTO_SELF_OPTIMIZE"]: continue
	vm=psutil.virtual_memory(); ovr={}
	if vm.percent>88:
	ovr["token_budget"]=max(512,int(0.75*(self.hive.compiler.override_budget or CFG["CTX_TOKENS"]))) # type: ignore
	ovr["temperature"]=max(0.2,self.hive.decoding_temperature-0.1)
	lat=(sum(self.hive.engine.stats["latency_ms"][-10:])/max(1,len(self.hive.engine.stats["latency_ms"][-10:]))) if self.hive.engine.stats["latency_ms"] else 0
	if lat>1200: ovr["retrieval_k"]=max(3,self.hive.retrieval_k-1)
	if ovr:
	ok,_=self.hive.overlay.patch(ovr, actor_role="hive")
	if ok: self.hive.overlay.apply_to(self.hive)
	if CFG["ALLOW_SELF_REBOOT"] and vm.percent>94:
	safe_reboot("refresh memory")

	# ----------- internal optimization stack -----------
	def _append_jsonl(path, rec):
	with open(path, "a", encoding="utf-8") as f:
	f.write(json.dumps(rec, ensure_ascii=False) + "\n")

	@dataclass
	class ChangeProposal:
	kind: str # "model" \| "package" \| "code"
	name: str # model id / package name / file target
	version: str = ""
	patch_text: str = ""# for "code": full replacement or diff
	reason: str = ""
	created_ts: float = time.time()
	proposer: str = "hive"
	id: str = ""

	class Sandbox:
	def __init__(self):
	self.root=os.path.join(OPT_DIR, f"sandbox_{int(time.time())}")
	os.makedirs(self.root, exist_ok=True)
	self.venv=os.path.join(self.root,"venv")
	def _run(self, args, timeout):
	p=subprocess.run(args, capture_output=True, text=True, timeout=timeout)
	return p.returncode, (p.stdout or "") + (p.stderr or "")
	def create(self):
	rc,out=self._run([sys.executable,"-m","venv",self.venv], timeout=120)
	if rc!=0: raise RuntimeError("venv create failed: "+out)
	def pip(self, pkg_spec):
	py=os.path.join(self.venv,"bin","python") if os.name!="nt" else os.path.join(self.venv,"Scripts","python.exe")
	rc,out=self._run([py,"-m","pip","install","--upgrade",pkg_spec], timeout=CFG["OPT_SANDBOX_TIMEOUT"])
	if rc!=0: raise RuntimeError("pip install failed: "+out)
	def run_snippet(self, code:str):
	py=os.path.join(self.venv,"bin","python") if os.name!="nt" else os.path.join(self.venv,"Scripts","python.exe")
	tmp=os.path.join(self.root,"snippet.py"); open(tmp,"w",encoding="utf-8").write(code)
	rc,out=self._run([py,tmp], timeout=CFG["OPT_SANDBOX_TIMEOUT"]); return rc,out

	def _synthetic_eval(hive_factory, prompts: List[str]) -> Dict:
	lat_ms=[]; toks_s=[]; quality=0.0
	for p in prompts:
	t0=time.time()
	h=hive_factory()
	out=h.pipe(h.compiler.compile(p, []), max_new_tokens=64, do_sample=False, temperature=0.2) # type: ignore
	t1=time.time()
	text=out[0]["generated_text"]
	lat_ms.append((t1-t0)*1000)
	toks=max(1,len(text.split())); toks_s.append(toks/max(0.001,(t1-t0)))
	q=sum(1 for w in set(re.findall(r"\w+", p.lower())) if w in text.lower())/max(1,len(set(re.findall(r"\w+", p.lower()))))
	quality+=q
	n=max(1,len(prompts))
	return {"lat_ms":sum(lat_ms)/n, "toks_s":sum(toks_s)/n, "quality":quality/n}

	class ChangeManager:
	def __init__(self, hive_cls):
	self.hive_cls=hive_cls
	def _allowed_pkg(self, name):
	return any(name.strip().startswith(allow.strip()) for allow in CFG["OPT_PKG_ALLOWLIST"])
	def _allowed_model(self, mid):
	return mid in CFG["OPT_MODEL_ALLOWLIST"]
	def propose(self, cp: ChangeProposal)->str:
	cp.id=f"chg_{int(time.time())}_{abs(hash(cp.name))%100000}"; _append_jsonl(OPT_PROPOSALS, cp.__dict__); return cp.id
	def test_and_compare(self, cp_id:str, proposal: ChangeProposal)->Dict:
	def base_hive(): return self.hive_cls(model_id=None)
	prompts=["Summarize the water cycle.","Translate to French: the quick brown fox jumps over the lazy dog.","Two-sentence difference between TCP and UDP."]
	base=_synthetic_eval(base_hive, prompts)
	sand=Sandbox(); sand.create()
	model_override=None
	try:
	if proposal.kind=="package":
	if not self._allowed_pkg(proposal.name): return {"ok":False,"reason":"package not allowlisted"}
	spec=proposal.name + (("=="+proposal.version) if proposal.version else "")
	sand.pip(spec)
	elif proposal.kind=="model":
	if not self._allowed_model(proposal.name): return {"ok":False,"reason":"model not allowlisted"}
	model_override=proposal.name
	elif proposal.kind=="code":
	target=os.path.basename(__file__); patched=os.path.join(sand.root,target)
	with open(patched,"w",encoding="utf-8") as f: f.write(proposal.patch_text or "")
	code=f"import importlib.util, json; p=r'{patched}'; spec=importlib.util.spec_from_file_location('hmod',p); m=importlib.util.module_from_spec(spec); spec.loader.exec_module(m); h=m.Hive(); print(json.dumps({{'ok':True}}))"
	rc,out=sand.run_snippet(code)
	if rc!=0 or '"ok": true' not in out.lower(): return {"ok":False,"reason":"patch smoke test failed","out":out}
	except Exception as e:
	return {"ok":False,"reason":f"sandbox failed: {e}"}
	def cand_hive(): return self.hive_cls(model_id=model_override) if model_override else self.hive_cls(model_id=None)
	cand=_synthetic_eval(cand_hive, prompts)
	delta={"lat_ms": base["lat_ms"]-cand["lat_ms"], "toks_s": cand["toks_s"]-base["toks_s"], "quality": cand["quality"]-base["quality"]}
	passed=True
	if CFG["OPT_THRESH_LATENCY_MS"]>0 and delta["lat_ms"]<CFG["OPT_THRESH_LATENCY_MS"]: passed=False
	if CFG["OPT_THRESH_TOKS_PER_S"]>0 and delta["toks_s"]<CFG["OPT_THRESH_TOKS_PER_S"]: passed=False
	if delta["quality"]<CFG["OPT_THRESH_QUALITY"]: passed=False
	result={"ok":True,"proposal":proposal.__dict__,"base":base,"cand":cand,"delta":delta,"passed":passed}
	_append_jsonl(OPT_RESULTS, result); return result
	def apply(self, result:Dict)->Tuple[bool,str]:
	prop=result.get("proposal",{}); kind=prop.get("kind"); name=prop.get("name","")
	if not result.get("passed"): return False,"did not meet thresholds"
	if kind=="package":
	if not self._allowed_pkg(name): return False,"package not allowlisted"
	try:
	subprocess.check_call([sys.executable,"-m","pip","install","--upgrade", name + (("=="+prop.get("version","")) if prop.get("version") else "")])
	return True,"package installed"
	except Exception as e: return False,f"pip failed: {e}"
	if kind=="model":
	if not self._allowed_model(name): return False,"model not allowlisted"
	pref=os.path.join(OPT_DIR,"preferred_model.json"); json.dump({"model_id":name,"ts":time.time()}, open(pref,"w",encoding="utf-8"))
	return True,"model preference recorded (takes effect after restart)" # type: ignore
	if kind=="code":
	if not CFG["OPT_AUTO_APPLY"]: return False,"awaiting Owner approval for code changes"
	try:
	target=os.path.abspath(__file__); backup=target+f".bak_{int(time.time())}"; shutil.copyfile(target,backup)
	open(target,"w",encoding="utf-8").write(prop.get("patch_text","")); return True,"code updated (backup created); restart recommended"
	except Exception as e: return False,f"code write failed: {e}"
	return False,"unknown change type"

	# ----------- Hive core -----------
	# --- Memory & Manifest Helpers (auto-inserted) ---
	import tempfile, urllib.request, tarfile, zipfile
	from pathlib import Path as _Path

	def _human_ts(ts: int) -> str:
	import datetime
	try: # type: ignore
	return datetime.datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S UTC")
	except Exception:
	return str(ts)

	INGEST_PROGRESS = os.path.join(CFG.get("STATE_DIR","./state"), "ingest_progress.json")

	def _load_progress():
	try:
	if os.path.exists(INGEST_PROGRESS):
	return json.load(open(INGEST_PROGRESS, "r", encoding="utf-8"))
	except Exception:
	pass
	return {"done": [], "stage": 0, "ts": 0}

	def _save_progress(p):
	try:
	json.dump(p, open(INGEST_PROGRESS, "w", encoding="utf-8"), indent=2)
	except Exception:
	pass

	def update_self_manifest(datasets_done: list, vectors_total: int):
	"""Rewrite the MEMORY_MANIFEST block inside this script."""
	if not CFG.get("HIVE_ALLOW_SELF_WRITE_MANIFEST", True):
	return False, "self-write disabled"

	target = CFG.get("HIVE_SELF_WRITE_FILE") or os.path.abspath(__file__)
	try:
	with open(target, "r", encoding="utf-8") as f:
	src = f.read()
	except Exception as e:
	return False, f"read error: {e}"

	start_tag = "# --- BEGIN MEMORY MANIFEST (auto-updated) ---"
	end_tag = "# --- END MEMORY MANIFEST ---"
	if start_tag not in src or end_tag not in src:
	return False, "manifest markers not found"

	head, rest = src.split(start_tag, 1)
	_, tail = rest.split(end_tag, 1)

	payload = {
	"updated_ts": int(time.time()),
	"datasets_done": sorted(list({*datasets_done})),
	"vectors_total": int(vectors_total),
	"notes": "Set HIVE_ALLOW_SELF_WRITE_MANIFEST=0 to stop auto-updates."
	}

	block = start_tag + "\n# (This block is auto-written by Hive to record what datasets/files\n# have already been converted into memory (curves). Do not edit by hand.)\n"
	block += "MEMORY_MANIFEST = " + json.dumps(payload, indent=4, ensure_ascii=False) + "\n"
	block += end_tag

	new_src = head + block + tail
	tmp = target + ".tmp"
	try:
	with open(tmp, "w", encoding="utf-8") as f:
	f.write(new_src)
	os.replace(tmp, target)
	except Exception as e:
	return False, f"write error: {e}"

	return True, f"manifest updated ({_human_ts(payload['updated_ts'])})"

	def _curves_present(curve_dir: str) -> bool:
	idx = os.path.join(curve_dir, "faiss.index")
	meta = os.path.join(curve_dir, "meta.jsonl")
	return os.path.exists(idx) and os.path.getsize(idx) > 0 and os.path.exists(meta)

	def _extract_archive(archive_path: str, dest_dir: str) -> bool:
	os.makedirs(dest_dir, exist_ok=True)
	try:
	if archive_path.endswith(".tar.gz") or archive_path.endswith(".tgz"):
	with tarfile.open(archive_path, "r:gz") as tf:
	tf.extractall(dest_dir)
	return True
	if archive_path.endswith(".zip"):
	with zipfile.ZipFile(archive_path, "r") as z:
	z.extractall(dest_dir)
	return True
	except Exception as e:
	with open(os.path.join(CFG.get("STATE_DIR","./state"), "restore_error.log"), "a", encoding="utf-8") as f: f.write(f"extract: {e}\n")
	return False

	def _restore_from_local_archive(curve_dir: str):
	arc = CFG.get("CURVES_ARCHIVE_LOCAL") or "curves.tar.gz"
	if not arc or not os.path.exists(arc):
	return False, "no local archive"
	ok = _extract_archive(arc, curve_dir)
	return (ok, "restored from local archive" if ok else "local extract failed")

	def _restore_from_url(curve_dir: str):
	url = (CFG.get("CURVES_ARCHIVE_URL") or "").strip()
	if not url:
	return False, "no URL provided"
	try:
	tmp = os.path.join(tempfile.gettempdir(), f"curves_{int(time.time())}.pkg")
	urllib.request.urlretrieve(url, tmp)
	ok = _extract_archive(tmp, curve_dir)
	try: os.remove(tmp)
	except: pass
	return (ok, "restored from URL" if ok else "URL extract failed")
	except Exception as e: # type: ignore
	open(os.path.join(CFG.get("STATE_DIR","./state"), "restore_error.log"), "a", encoding="utf-8").write(f"url: {e}\n")
	return False, "URL download error"

	def _restore_from_hf_dataset(curve_dir: str):
	repo_id = (CFG.get("CURVES_HF_DATASET") or "").strip()
	sub = (CFG.get("CURVES_HF_SUBPATH") or "").strip()
	if not repo_id:
	return False, "no dataset repo"
	try:
	from huggingface_hub import snapshot_download, hf_hub_download
	cache = os.path.join("/tmp", "hf_curves_cache")
	token = CFG.get("HF_READ_TOKEN") or None
	for fname in ["curves.tar.gz", "curves.zip"]:
	try:
	fp = hf_hub_download(repo_id=repo_id, filename=(sub + "/" + fname) if sub else fname, token=token, local_dir=cache, local_dir_use_symlinks=False)
	if _extract_archive(fp, curve_dir):
	return True, f"restored from HF dataset file {fname}"
	except Exception:
	pass

	# auto-archive after each dataset if configured
	if CFG.get("HIVE_AUTO_ARCHIVE", True) and str(CFG.get("HIVE_AUTO_ARCHIVE_MODE","per_chain")).lower() == "per_dataset":
	try:
	_ok_arc, _ap = _archive_memory(curve_dir) # type: ignore
	open(os.path.join(CFG["STATE_DIR"], "archive_status.log"), "a", encoding="utf-8").write(
	json.dumps({"ts": time.time(), "mode": "per_dataset", "ok": _ok_arc, "path": _ap}) + "\n"
	)
	except Exception as _e_arc:
	open(os.path.join(CFG["STATE_DIR"], "archive_error.log"), "a", encoding="utf-8").write(
	"per_dataset: " + str(_e_arc) + "\n"
	) # type: ignore
	src = os.path.join(local_dir, sub) if sub else local_dir
	if os.path.isdir(src):
	for root, dirs, files in os.walk(src):
	rel = os.path.relpath(root, src)
	dest_root = os.path.join(curve_dir, rel) if rel != "." else curve_dir
	os.makedirs(dest_root, exist_ok=True)
	for fn in files:
	shutil.copy2(os.path.join(root, fn), os.path.join(dest_root, fn))
	return True, "restored from HF dataset snapshot"
	return False, "HF snapshot missing subpath"
	except Exception as e: # type: ignore
	open(os.path.join(CFG.get("STATE_DIR","./state"), "restore_error.log"), "a", encoding="utf-8").write(f"hf: {e}\n")
	return False, "HF restore error"

	def restore_curves_if_missing(curve_dir: str):

	if not CFG.get("HIVE_CURVES_AUTO_RESTORE", True):
	return False, "auto-restore disabled"
	if _curves_present(curve_dir):
	return True, "memory present"
	ok, msg = _restore_from_local_archive(curve_dir)
	if ok and _curves_present(curve_dir):
	return True, msg
	ok, msg = _restore_from_url(curve_dir)
	if ok and _curves_present(curve_dir):
	return True, msg
	ok, msg = _restore_from_hf_dataset(curve_dir)
	if ok and _curves_present(curve_dir):
	return True, msg
	return False, "no restore source succeeded"
	def _archive_memory(curve_dir: str, archive_path: str=None) -> tuple:
	"""Tar+gzip the memory directory to archive_path (default curves.tar.gz)."""
	try:
	import tarfile, tempfile as _tf
	ap = archive_path or CFG.get("HIVE_ARCHIVE_PATH","curves.tar.gz") or "curves.tar.gz"
	# write to temp then move for atomicity
	tmp = os.path.join(_tf.gettempdir(), f"curves_{int(time.time())}.tar.gz")
	with tarfile.open(tmp, "w:gz") as tar:
	tar.add(curve_dir, arcname="curves")
	os.replace(tmp, ap)
	return True, ap
	except Exception as e:
	try:
	open(os.path.join(CFG["STATE_DIR"], "archive_error.log"), "a", encoding="utf-8").write(str(e)+"\n")
	except Exception:
	pass
	return False, str(e)


	if not CFG.get("CURVES_AUTO_RESTORE", True):
	return False, "auto-restore disabled" # type: ignore
	if _curves_present(curve_dir):
	return True, "curves already present"
	ok, msg = _restore_from_local_archive(curve_dir)
	if ok and _curves_present(curve_dir): return True, msg
	ok, msg = _restore_from_url(curve_dir)
	if ok and _curves_present(curve_dir): return True, msg
	ok, msg = _restore_from_hf_dataset(curve_dir)
	if ok and _curves_present(curve_dir): return True, msg
	return False, "no restore source succeeded"
	# --- End Memory & Manifest Helpers ---


	# --- Staged Ingestion Orchestrator (auto) ---
	def _plan_sources():
	srcs = [s.strip() for s in (CFG.get("INGEST_SOURCES") or "").split(",") if s.strip()]
	return srcs or (DEFAULT_SOURCES if "DEFAULT_SOURCES" in globals() else [])

	def _next_batch(done: list, all_sources: list, k: int):
	todo = [s for s in all_sources if s not in set(done)]
	return todo[:max(k,0)]

	def staged_ingest_once(curve_dir: str) -> dict:
	"""Ingest a single stage (up to HIVE_INGEST_STAGE_SIZE datasets), respecting disk floor. Updates progress + manifest."""
	try:
	import shutil, time as _t
	floor = int(CFG.get("HIVE_INGEST_MIN_FREE_GB", 8))
	free_gb = shutil.disk_usage(".").free / (1024**3)
	if free_gb < floor:
	return {"ok": False, "reason": f"free disk {free_gb:.1f} GB < floor {floor} GB"}
	all_sources = _plan_sources()
	prog = _load_progress()
	batch = _next_batch(prog.get("done", []), all_sources, int(CFG.get("HIVE_INGEST_STAGE_SIZE",3)))
	if not batch:
	return {"ok": True, "reason": "all sources already ingested", "done": prog.get("done", [])}
	total_added = 0
	actually_ingested = []
	for ds in batch:
	added = ingest_all(curve_dir, [ds], scope="general")
	total_added += added
	actually_ingested.append(ds)
	prog["done"].append(ds)
	# check disk after each dataset
	free_gb = shutil.disk_usage(".").free / (1024**3)
	if free_gb < floor:
	break
	prog["stage"] = int(prog.get("stage", 0)) + 1
	prog["ts"] = int(_t.time())
	_save_progress(prog)
	# manifest update
	try:
	vecs = 0
	try: # type: ignore
	vecs = CurveStore(curve_dir).index.ntotal
	except Exception:
	pass
	update_self_manifest(prog.get("done", []), int(vecs))
	except Exception:
	pass
	return {"ok": True, "ingested": actually_ingested, "added_vectors_est": total_added, "stage": prog["stage"]}
	except Exception as _e:
	try:
	open(os.path.join(CFG.get("STATE_DIR","./state"), "ingest_error.log"), "a", encoding="utf-8").write(str(_e)+"\n")
	except Exception:
	pass
	return {"ok": False, "error": str(_e)}

	def staged_ingest_chain_if_enabled(curve_dir: str) -> dict:
	"""Run 0..N stages this boot depending on HIVE_INGEST_CHAIN and HIVE_INGEST_CHAIN_MAX, with safety checks."""
	if not CFG.get("HIVE_INGEST_STAGED", True):
	return {"ok": True, "reason": "staged disabled"}
	results = []
	max_stages = max(0, int(CFG.get("HIVE_INGEST_CHAIN_MAX", 2))) if CFG.get("HIVE_INGEST_CHAIN", True) else (1 if CFG.get("HIVE_INGEST_NEXT") else 0)
	for i in range(max_stages):
	r = staged_ingest_once(curve_dir)
	results.append(r)
	if not r.get("ok", False):
	break
	if r.get("reason") == "all sources already ingested":
	break
	# stop if no items were ingested (e.g., disk floor hit immediately)
	if not r.get("ingested"):
	break
	# auto-archive after chain if configured
	if CFG.get("HIVE_AUTO_ARCHIVE", True) and str(CFG.get("HIVE_AUTO_ARCHIVE_MODE","per_chain")).lower() in ("per_chain","perdataset","per-dataset"):
	try:
	_ok_arc, _ap = _archive_memory(curve_dir) # type: ignore
	open(os.path.join(CFG["STATE_DIR"], "archive_status.log"), "a", encoding="utf-8").write(json.dumps({"ts":time.time(),"mode":"per_chain","ok":_ok_arc,"path":_ap})+"\n")
	except Exception as _e_arc:
	open(os.path.join(CFG["STATE_DIR"], "archive_error.log"), "a", encoding="utf-8").write("per_chain: "+str(_e_arc)+"\n")

	return {"ok": True, "chain_results": results}
	# --- End Staged Ingestion Orchestrator ---


	class Hive:
	def __init__(self, model_id: Optional[str]=None, device: Optional[str]=None):
	# --- try restoring memory if missing (local archive / URL / HF dataset) ---
	try:
	ok_restored, restore_msg = restore_curves_if_missing(CFG["CURVE_DIR"])
	open(os.path.join(CFG["STATE_DIR"], "restore_status.log"), "a", encoding="utf-8").write(json.dumps({"ok":bool(ok_restored),"msg":restore_msg,"ts":time.time()})+"\n")
	except Exception as e:
	open(os.path.join(CFG["STATE_DIR"], "restore_error.log"), "a", encoding="utf-8").write("restore: "+str(e)+"\n")
	# --- staged ingestion chaining (run next stages automatically if enabled) ---
	try:
	_ing_chain = staged_ingest_chain_if_enabled(CFG["CURVE_DIR"])
	open(os.path.join(CFG["STATE_DIR"], "ingest_chain_status.log"), "a", encoding="utf-8").write(json.dumps({"ts":time.time(),"chain":_ing_chain})+"\n")
	except Exception as e:
	open(os.path.join(CFG["STATE_DIR"], "ingest_error.log"), "a", encoding="utf-8").write("chain: "+str(e)+"\n")

	if ok_restored:
	try:
	if CurveStore(CFG["CURVE_DIR"] if "CURVE_DIR" in CFG else CFG.get("MEMORY_DIR","./curves")).index.ntotal > 0:
	_mark_offline_ready() # type: ignore
	except Exception:
	pass
	except Exception as e:
	open(os.path.join(CFG["STATE_DIR"], "restore_error.log"), "a", encoding="utf-8").write(f"restore: {e}\n")

	need_ingest=False
	if CFG["FORCE_REINGEST"]: need_ingest=True
	else:
	if not _curves_ready(CFG["CURVE_DIR"]) and CFG["BOOTSTRAP_INGEST"]: need_ingest=True
	if need_ingest:
	try:
	srcs=[s.strip() for s in (CFG["INGEST_SOURCES"] or "").split(",") if s.strip()] or DEFAULT_SOURCES
	ingest_all(CFG["CURVE_DIR"], srcs, scope="general")
	if CurveStore(CFG["CURVE_DIR"]).index.ntotal>0: _mark_offline_ready()
	except Exception as e:
	open(os.path.join(CFG["CURVE_DIR"],"ingest_error.log"),"a",encoding="utf-8").write(str(e)+"\n")

	self.store=CurveStore(CFG["CURVE_DIR"]); self.librarian=LibrarianCurve(self.store)
	self.compiler=PromptCompiler(); self.engine=EngineCurve()
	if not model_id:
	model_id, info = pick_model() if CFG["LLM_AUTOSIZE"] else (CANDIDATES[0][0], {"device":"cpu"})
	device = info.get("device","cpu")
	self.model_id=model_id or CFG["MODEL_OVERRIDE"] or CANDIDATES[0][0]
	trust=True; kwargs={}
	if torch and torch.cuda.is_available() and device=="cuda":
	kwargs.update(dict(torch_dtype=torch.float16, device_map="auto"))

	# --- Model / Tokenizer initialization (supports local transformers or Hugging Face Inference API) ---
	use_remote = CFG["HIVE_USE_HF_INFERENCE"]
	if use_remote:
	# Remote path using huggingface_hub.InferenceClient
	try:
	from huggingface_hub import InferenceClient
	except Exception as e:
	raise RuntimeError(f"HIVE_USE_HF_INFERENCE=1 but huggingface_hub is missing: {e}")
	endpoint = os.getenv("HIVE_HF_ENDPOINT","").strip() or None
	token = CFG["HF_READ_TOKEN"] or os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") or None
	self.client = InferenceClient(model=self.model_id if endpoint is None else None,
	token=token,
	timeout= int(os.getenv("HIVE_HF_TIMEOUT","60") or "60"),
	base_url=endpoint)
	# define a thin wrapper so downstream code can call self.pipe(prompt, **gen_kwargs)
	def _remote_pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, **kw):
	# Some endpoints require a stop sequence; fall back to "Assistant:" which our compiler uses.
	stop = kw.get("stop_sequences") or ["</s>", "Assistant:"]
	resp = self.client.text_generation(
	prompt,
	max_new_tokens=int(max_new_tokens),
	temperature=float(temperature),
	do_sample=bool(do_sample),
	stop_sequences=stop,
	stream=False,
	)
	return [{"generated_text": resp}]
	self.pipe = _remote_pipe
	self.tok = None
	self.model = None
	else:
	# Local path using transformers
	self.tok = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=trust)
	# Prefer half precision on GPU
	self.model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=trust, **kwargs)
	self.pipe = pipeline(
	"text-generation",
	model=self.model,
	tokenizer=self.tok,
	device=0 if (torch and torch.cuda.is_available() and device=="cuda") else -1
	)
	self.overlay=RuntimeOverlay()
	self.retrieval_k=6; self.decoding_temperature=0.7; self.web_threshold=0.40
	self.overlay.apply_to(self)

	self.state_path=os.path.join(CFG["STATE_DIR"],"last_state.json")
	if not os.path.exists(self.state_path): _save_json(self.state_path, {"ok":True,"ts":time.time()})

	# Preferred model (record exists)
	try:
	pref=json.load(open(os.path.join(OPT_DIR,"preferred_model.json"),"r",encoding="utf-8"))
	if isinstance(pref,dict) and pref.get("model_id") in CFG["OPT_MODEL_ALLOWLIST"]:
	pass
	except Exception:
	pass

	self.changes=ChangeManager(Hive)

	try:
	self.selfopt=SelfOptimizer(self); self.selfopt.start()
	except Exception:
	pass

	def summarize_for_memory(self, text:str, max_new_tokens:int=160)->str:
	prompt=("Condense the following content into 4–6 bullet points with names, dates, numbers, and a one-line takeaway. Keep it factual.\n\n"
	f"{text[:3000]}\n\nSummary:")
	out=self.pipe(prompt, max_new_tokens=max_new_tokens, do_sample=False, temperature=0.01)
	return out[0]["generated_text"].split("Summary:",1)[-1].strip()

	def add_curve(self, text:str, meta:Dict, scope:str="general"):
	self.librarian.ingest_pairs([text],[meta],scope)

	def online_update(self, query_hint: Optional[str]=None)->Dict:
	if not CFG["ONLINE_ENABLE"]: return {"ok":False,"reason":"online disabled"}
	if not online_available(CFG["ONLINE_TIMEOUT"]): return {"ok":False,"reason":"offline"}
	seen=_load_json(ONLINE_DB, {})
	urls=[u.strip() for u in (CFG["ONLINE_SOURCES"] or "").split(",") if u.strip()]
	items=fetch_rss(urls, timeout=CFG["ONLINE_TIMEOUT"], limit=30)
	added=0
	for it in items:
	key=hashlib.sha1(((it.get("link") or "")+(it.get("title") or "")).encode("utf-8","ignore")).hexdigest()
	if key in seen: continue
	base=(it.get("title","")+"\n\n"+it.get("summary","")).strip()
	summ=self.summarize_for_memory(base)
	self.add_curve(summ, {"dataset":"online_rss","url":it.get("link"),"title":it.get("title"),"published":it.get("published")}, scope="general")
	seen[key]=int(time.time()); added+=1
	_save_json(ONLINE_DB, seen); return {"ok":True,"added":added}

	def web_update_and_store(self, query:str, max_docs:int, timeout:int)->int:
	if not (CFG["ONLINE_ENABLE"] and online_available(timeout)): return 0
	hits=web_search_snippets(query, max_results=max_docs, timeout=timeout); added=0
	for h in hits:
	body=(h.get("title","")+"\n\n"+h.get("body","")).strip()
	if not body: continue
	summ=self.summarize_for_memory(body)
	meta={"dataset":"web_update","source":h.get("href",""),"title":h.get("title",""),"ts":time.time()}
	self.add_curve(summ, meta, scope="general"); added+=1
	return added

	def chat(self, message:str, effective_role:str, caller_id: Optional[str],
	k:int=None, max_new_tokens:int=256, temperature:float=None)->str:
	online_now=NET.online_quick()
	if not online_now: NET.kick_async()
	kk = k if k is not None else self.retrieval_k
	temp = temperature if temperature is not None else self.decoding_temperature
	snippets, scores = self.librarian.retrieve_scoped_with_scores(message, effective_role, caller_id, k=kk)
	cov=coverage_score_from_snippets(snippets, scores)
	SHOULD_TRY_WEB=(CFG["ONLINE_TRIGGER"].lower()=="auto") and CFG["ONLINE_ENABLE"] and online_now
	if cov < self.web_threshold and SHOULD_TRY_WEB:
	try:
	self.web_update_and_store(message, max_docs=int(CFG["ONLINE_MAX_RESULTS"] or 5), timeout=int(CFG["ONLINE_TIMEOUT"] or 8))
	snippets, scores = self.librarian.retrieve_scoped_with_scores(message, effective_role, caller_id, k=kk)
	except Exception:
	pass
	prompt=self.compiler.compile(message, snippets, token_budget=CFG["CTX_TOKENS"])
	_=self.engine.run(message, snippets)
	out=self.pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=temp)
	reply=out[0]["generated_text"].split("Assistant:",1)[-1].strip()
	if CFG["NO_PROFANITY"]:
	reply=re.sub(r"\b(fuck\|shit\|bitch\|asshole\|cunt\|dick\|pussy\|nigger\|motherfucker)\b","[censored]",reply, flags=re.I)
	return reply

	# ----------- OCR helper -----------
	def ocr_text_from_image_bgr(image_bgr)->str:
	if not (_HAVE_CV and _HAVE_TESS): return ""
	gray=cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY) # type: ignore
	return pytesseract.image_to_string(gray) or ""

	# --------------- UI ---------------
	HELP=f"""
	Admin/User mode: Admins (general/super) and Owner log in with password (Owner also needs second factor). After login choose Admin or User mode.
	Owner-only code edits are enforced via Change Manager policy. Hive can sandbox, test, and propose; code writes require Owner approval (`OPT_AUTO_APPLY=1`) unless Owner applies manually.

	Offline/Online: Works fully offline from curves. If online and enabled, fetches RSS/web snippets ➡️ summarizes locally ➡️ saves to curves (persists offline).
	Voice: Faster-Whisper ASR (auto language), Piper TTS mixed-language, phonics hints (English).
	Privacy: Sensitive/first-person inputs route to user-private library; neutral info to general.
	"""

	def launch_ui():
	hive=Hive(); store=CurveStore(CFG["CURVE_DIR"]); lib=LibrarianCurve(store)

	with gr.Blocks(title="Hive 🐝 Full Merged Optimized") as demo:
	gr.Markdown(f"## {CFG['AGENT_NAME']} 🐝 Full Merged, Offline-first + Online updates + Internal Optimization")

	with gr.Row():
	login_name=gr.Textbox(label="Name or ID")
	login_pass=gr.Textbox(label="Password (admins only)", type="password")
	login_second=gr.Textbox(label="Second (owner only)", type="password")
	login_btn=gr.Button("Login")
	login_status=gr.Markdown()
	uid_state=gr.State(None); role_state=gr.State("guest"); mode_state=gr.State("user"); phonics_state=gr.State(False)

	def do_login(nm,pw,sec):
	ok, info=attempt_login(nm or "", pw or "", sec or None)
	d=_load_users(); u,_=_find_user(d, nm or "")
	role=u["role"] if u else "guest"
	prof=_load_json(ADAPT_DB,{}).get(u["id"] if u else "guest",{}); phon_on=bool(prof.get("phonics_on",False))
	return info,(u["id"] if u else None),role,"user",phon_on
	login_btn.click(do_login,[login_name,login_pass,login_second],[login_status, uid_state, role_state, mode_state, phonics_state])

	mode_picker=gr.Radio(choices=["user","admin"], value="user", label="Mode (admins/owner only)")
	def set_mode(role, pick):
	if role not in ("admin_general","admin_super","owner"): return "user"
	return pick
	mode_picker.change(set_mode, [role_state, mode_picker], [mode_state])

	with gr.Tab("Chat"):
	chat=gr.Chatbot(height=420)
	msg=gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']}")
	def talk(m, uid, role, mode, hist):
	eff = role if mode=="admin" else "user"
	reply=hive.chat(m or "", eff, caller_id=uid)
	# privacy routing
	personal = False
	if re.search(r"\b(my\|mine\|me\|I\|our\|we)\b", (m or ""), re.I) and re.search(r"\b(password\|address\|email\|phone\|ssn\|school\|kid\|medical\|bank\|card\|passport)\b", (m or ""), re.I):
	personal = True
	scope = f"user:{uid}" if (uid and personal) else "general"
	lib.ingest_pairs([m],[{"dataset":"chat"}], scope=scope)
	return hist+[[m, reply]], ""
	msg.submit(talk,[msg,uid_state,role_state,mode_state,chat],[chat,msg])

	with gr.Tab("Voice"):
	gr.Markdown("### Voice login / ASR / Mixed-language TTS / Phonics")
	mic=gr.Audio(sources=["microphone"], type="filepath", label="Speak (5–10s)")
	asr_lang=gr.Dropdown(choices=["auto","en","es","fr","de","zh"], value="auto", label="ASR language (force or auto)")
	phonics_toggle=gr.Checkbox(value=False, label="Enable Phonics assist (English pronunciation help)")
	transcribe_btn=gr.Button("Transcribe")
	transcript=gr.Textbox(label="Transcript")
	who_btn=gr.Button("Login by Voice (users only)")
	who_status=gr.Markdown()
	reply_btn=gr.Button("Reply + Speak")
	reply_text=gr.Textbox(label="Assistant Reply")
	reply_audio=gr.Audio(type="filepath", label="Assistant Voice")

	def do_transcribe(path, asr_lg, uid):
	if not path: return ""
	text=asr_transcribe(path, uid, None if asr_lg=="auto" else asr_lg)
	prof=_load_json(ADAPT_DB,{}); p=prof.get(uid or "guest",{})
	dur=librosa.get_duration(filename=path) or 0.001
	syl=len(re.findall(r"[aeiouyAEIOUY]+", text)); rate=(syl/dur) if dur > 0 else 0
	p["rate"]=0.8p.get("rate", rate)+0.2rate
	chunks=lid_chunk(text); en_len=sum(len(c) for c,l in chunks if l.startswith("en")); all_len=sum(len(c) for c,l in chunks)
	if all_len>0:
	ratio=en_len/all_len; p["codeswitch_en"]=0.8p.get("codeswitch_en",ratio)+0.2ratio
	prof[uid or "guest"]=p; _save_json(ADAPT_DB,prof)
	scope="user:"+uid if uid and ("my " in text.lower() or "I " in text) else "general"
	lib.ingest_pairs([text],[{"dataset":"voice_asr"}], scope=scope)
	return text
	transcribe_btn.click(do_transcribe,[mic,asr_lang,uid_state],[transcript])

	def do_login_voice(path):
	if not path: return "No audio.", None, "guest", "user"
	uidv=identify_voice(path)
	if not uidv: return "Voice not recognized. You can enroll as a new user.", None, "guest", "user"
	d=_load_users()
	for grp in ["users","admins_general","admins_super"]:
	for u in d.get(grp,[]):
	if u["id"]==uidv:
	if u["role"] in ("admin_general","admin_super"):
	return "Admin roles require password login.", None, "guest", "user"
	return f"Welcome back, {u['name']} (user).", uidv, "user", "user"
	if d["owner"]["id"]==uidv: return "Owner must login with password + second factor.", None, "guest", "user"
	return "Matched unknown id; please login manually.", None, "guest", "user"
	who_btn.click(do_login_voice,[mic],[who_status, uid_state, role_state, mode_state])

	def do_reply(uid, role, mode, text, phon_toggle):
	if not text: return "", None
	eff = role if mode=="admin" else "user"
	prompt=text
	if phon_toggle:
	notes=[]
	for ch, lg in lid_chunk(text):
	if lg.startswith("en"):
	for w in re.findall(r"\b[A-Za-z][A-Za-z-']+\b", ch):
	if len(w)>=6 or w[0].isupper(): notes.append(f"{w}: {phonics(w)}")
	if notes: prompt += "\n\n(Phonics)\n" + "\n".join(f"- {n}" for n in notes[:10])
	prof=_load_json(ADAPT_DB,{}); p=prof.get(uid or "guest",{}); p["phonics_on"]=True; prof[uid or "guest"]=p; _save_json(ADAPT_DB,prof)
	ans=hive.chat(prompt, eff, caller_id=uid)
	wav=synthesize_multilang(ans, CFG["TTS_LANG"]); return ans, wav # type: ignore
	reply_btn.click(do_reply,[uid_state, role_state, mode_state, transcript, phonics_toggle],[reply_text, reply_audio])

	with gr.Accordion("Voice enrollment (add your voiceprint)", open=False):
	enroll_audio=gr.Audio(sources=["microphone"], type="filepath", label="Record 5–10s")
	enroll_btn=gr.Button("Enroll voice for current user"); enroll_status=gr.Markdown()
	def do_enroll(uid, path):
	if not uid: return "Login or specify user first."
	if not path: return "No audio."
	enroll_voice(uid, path); return "Voice enrolled."
	enroll_btn.click(do_enroll,[uid_state, enroll_audio],[enroll_status])

	with gr.Accordion("New user by voice (no password)", open=False):
	nu_audio=gr.Audio(sources=["microphone"], type="filepath", label="Record 5–10s")
	nu_name=gr.Textbox(label="Your name")
	nu_lang=gr.Dropdown(choices=["en","es","fr","de","zh"], value="en", label="Preferred language")
	nu_btn=gr.Button("Create user from my voice"); nu_status=gr.Markdown()
	def do_new_user(path, name, lang):
	if not path or not name: return "Provide audio and a name."
	d=_load_users(); uid=f"user:{int(time.time())}"
	entry={"id":uid,"name":name,"role":"user","pass":"","prefs":{"activation_names":[CFG['AGENT_NAME']],"language":lang}}
	d["users"].append(entry); _save_json(USERS_DB,d); enroll_voice(uid, path)
	return f"Created user {name} ({uid}) with enrolled voice."
	nu_btn.click(do_new_user,[nu_audio, nu_name, nu_lang],[nu_status])

	with gr.Tab("Online & Wi-Fi"):
	gr.Markdown("### Auto-connect to known Wi-Fi (non-blocking) and fetch online updates")
	wifi_status=gr.Markdown("Wi-Fi: checking...")
	connect_now=gr.Button("Try auto-connect now (non-blocking)")
	online_now=gr.Button("Fetch updates now"); online_status=gr.Markdown()
	connect_now.click(lambda: (NET.kick_async() or True) and "Auto-connect started in background.", [], [wifi_status])
	online_now.click(lambda: ("Added %s new summaries to curves." % (Hive().online_update().get("added",0))), [], [online_status])

	with gr.Tab("Help"): gr.Markdown(HELP)

	# ------ Admin Controls (no separate tab; visible in Admin mode) ------
	with gr.Accordion("Admin Controls (switch to Admin mode to enable)", open=False, visible=True) as admin_controls:
	admin_info=gr.Markdown("Switch to Admin mode above to use these tools.")
	target=gr.Textbox(label="Target name or id")
	new_name=gr.Textbox(label="New name")
	new_pass=gr.Textbox(label="New password")
	new_role=gr.Dropdown(choices=["owner","admin_super","admin_general","user"], value="user", label="New role")
	add_name=gr.Textbox(label="Add: name")
	add_role=gr.Dropdown(choices=["admin_super","admin_general","user"], value="user", label="Add role")
	add_pass=gr.Textbox(label="Add password (admins only)")
	add_btn=gr.Button("Add user/admin")
	rename_btn=gr.Button("Rename")
	pass_btn=gr.Button("Change password")
	role_btn=gr.Button("Change role")
	out=gr.Markdown()

	def is_admin(mode, role): return (mode=="admin") and (role in ("admin_general","admin_super","owner"))

	def do_add(mode, role, caller, nm, rl, pw):
	if not is_admin(mode, role): return "Switch to Admin mode to use this."
	d=_load_users(); cu,_=_find_user(d, caller or "")
	if not cu: return "Login first as admin."
	if rl not in PERMS.get(cu["role"],{}).get("can_add",[]): return f"{cu['role']} cannot add {rl}."
	uid=f"{rl}:{int(time.time())}"
	entry={"id":uid,"name":nm,"role":rl,"pass":pw if rl!='user' else "", "prefs":{"activation_names":[CFG["AGENT_NAME"]],"language":"en"}}
	if rl=="owner": d["owner"]=entry
	elif rl=="admin_super": d["admins_super"].append(entry)
	elif rl=="admin_general": d["admins_general"].append(entry)
	else: d["users"].append(entry)
	_save_json(USERS_DB,d); return f"Added {rl}: {nm}"
	add_btn.click(do_add, [mode_state, role_state, uid_state, add_name, add_role, add_pass], [out])

	def do_rename(mode, role, caller, tgt, nm):
	if not is_admin(mode, role): return "Switch to Admin mode to use this."
	d=_load_users(); u,_=_find_user(d, tgt or "")
	if not u: return "Target not found."
	cu,_=_find_user(d, caller or "")
	if not cu: return "Login first."
	if u["role"] in PERMS.get(cu["role"],{}).get("can_edit_profile_of",[]):
	u["name"]=nm; _save_json(USERS_DB,d); return "Renamed."
	return "Not allowed."
	rename_btn.click(do_rename,[mode_state, role_state, uid_state, target, new_name],[out])

	def do_pass(mode, role, caller, tgt, pw):
	if not is_admin(mode, role): return "Switch to Admin mode to use this."
	d=_load_users(); u,_=_find_user(d, tgt or "")
	if not u: return "Target not found."
	cu,_=_find_user(d, caller or "")
	if not cu: return "Login first."
	if u["role"] in PERMS.get(cu["role"],{}).get("can_edit_profile_of",[]):
	u["pass"]=pw; _save_json(USERS_DB,d); return "Password changed."
	return "Not allowed."
	pass_btn.click(do_pass,[mode_state, role_state, uid_state, target, new_pass],[out])

	def do_role(mode, role, caller, tgt, rl):
	if not is_admin(mode, role): return "Switch to Admin mode to use this."
	d=_load_users(); u,_=_find_user(d, tgt or "")
	if not u: return "Target not found."
	cu,_=_find_user(d, caller or "");
	if not cu: return "Login first."
	allowed_new = {"owner":["owner","admin_super","admin_general","user"],
	"admin_super":["admin_general","user"],
	"admin_general":["admin_general","user"]}.get(cu["role"], [])
	if u["role"] not in PERMS.get(cu["role"],{}).get("can_edit_role_of",[]) or rl not in allowed_new:
	return f"Not allowed to set {rl}."
	for grp in ["admins_super","admins_general","users"]:
	d[grp]=[x for x in d[grp] if x["id"]!=u["id"]]
	if rl=="owner": d["owner"]=u; u["role"]="owner"
	elif rl=="admin_super": d["admins_super"].append(u); u["role"]="admin_super"
	elif rl=="admin_general": d["admins_general"].append(u); u["role"]="admin_general"
	else: d["users"].append(u); u["role"]="user"
	_save_json(USERS_DB,d); return f"Role set to {rl}."
	role_btn.click(do_role,[mode_state, role_state, uid_state, target, new_role],[out])

	# ------ Internal Optimization controls (Owner-gated) ------
	gr.Markdown("### Internal Optimization (Change Manager)")
	prop_kind=gr.Dropdown(choices=["model","package","code"], value="model", label="Proposal type")
	prop_name=gr.Textbox(label="Model ID / Package Name")
	prop_ver=gr.Textbox(label="Package version (optional)")
	prop_reason=gr.Textbox(label="Why this change?")
	prop_patch=gr.Code(label="Code patch (for 'code' proposals): paste full replacement or diff")
	propose_btn=gr.Button("Propose"); test_btn=gr.Button("Test in sandbox"); apply_btn=gr.Button("Apply (policy-checked)")
	opt_out=gr.Markdown()
	_last = {"id": None, "obj": None}
	def do_propose(kind,name,ver,reason,patch):
	cp=ChangeProposal(kind=kind,name=name or "",version=ver or "",reason=reason or "",patch_text=patch or "")
	pid=hive.changes.propose(cp); _last["id"]=pid; _last["obj"]=cp
	return f"Proposed {kind}: {name or '(code patch)'} (id:{pid})"
	def do_test():
	if not _last["obj"]: return "No proposal in memory. Submit one first."
	res=hive.changes.test_and_compare(_last["id"], _last["obj"]); return json.dumps(res, indent=2)
	def do_apply(role, mode):
	if role not in ("admin_super","owner") or mode!="admin": return "Only admin_super or owner may apply."
	if not _last["obj"]: return "No proposal loaded."
	res=hive.changes.test_and_compare(_last["id"], _last["obj"])
	if not res.get("ok"): return f"Test failed: {res.get('reason','unknown')}"
	if _last["obj"].kind=="code" and role!="owner" and not CFG["OPT_AUTO_APPLY"]: return "Awaiting Owner approval for code changes."
	ok,msg=hive.changes.apply(res); return msg if ok else f"Apply failed: {msg}"
	propose_btn.click(do_propose, [prop_kind,prop_name,prop_ver,prop_reason,prop_patch],[opt_out]) # type: ignore
	test_btn.click(lambda: do_test(), [], [opt_out])
	apply_btn.click(do_apply, [role_state, mode_state], [opt_out])

	demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", "7860")))

	# ----------- entry -----------
	if __name__=="__main__":
	if ENV("HIVE_LAUNCH_UI","1",bool):
	launch_ui()
	else:
	h=Hive(); print("CLI mode. Type and press Enter (Ctrl+C to exit).")
	try:
	while True:
	s=input("> ").strip()
	if not s: continue
	print(h.chat(s, effective_role="user", caller_id=None))
	except KeyboardInterrupt:
	pass
	--- a/app.py
	+++ b/app.py
	@@ -1,783 +1,1006 @@
	-# === BOOTSTRAP + SAFE DATASETS SHIM (must be line 1) =========================
	-import os, sys, subprocess, importlib.util, importlib.machinery, types
	-import sys # make sure this import is near the top of your file
	-# near the other imports
	-import os
	-
	-# Defaults that work for both CLI and UI; can be overridden via env if you want
	-DEFAULT_EFFECTIVE_ROLE = os.getenv("EFFECTIVE_ROLE", "user")
	-DEFAULT_CALLER_ID_CLI = os.getenv("CALLER_ID_CLI", "cli")
	-DEFAULT_CALLER_ID_UI = os.getenv("CALLER_ID_UI", "ui")
	-
	-def chat_once(hive: "Hive", text: str, *, role: str = DEFAULT_EFFECTIVE_ROLE, caller_id: str = DEFAULT_CALLER_ID_CLI):
	- """
	- Thin wrapper so all calls to Hive.chat() include the required args.
	- """
	- return hive.chat(text, effective_role=role, caller_id=caller_id)
	-
	-try:
	- import gradio as gr
	-except Exception as e:
	- gr = None
	- print(f"[ui] Gradio import failed: {e}")
	-# --- SAFE loader for SentenceTransformer to avoid "meta tensor" crashes ---
	-import os
	-from sentence_transformers import SentenceTransformer
	-
	-def load_sentence_transformer_safely(model_name: str, device: str = "cpu"):
	- """
	- Load a SentenceTransformer without going through an accelerate/empty-weights
	- path that can yield 'meta' tensors. Never call .to(device) after creation.
	- """
	- # Guardrails — avoid accelerate meta in some containers
	- os.environ.pop("ACCELERATE_USE_DEEPSPEED", None)
	- os.environ.pop("ACCELERATE_MIXED_PRECISION", None)
	-
	- # First attempt: construct directly on the requested device
	- try:
	- st = SentenceTransformer(
	- model_name,
	- device=device, # <-- construct directly on the device
	- trust_remote_code=True, # some ST models need this
	- )
	- return st
	- except NotImplementedError as e:
	- # If we still hit the meta-copy error, load on CPU and keep it there.
	- print(f"[warn] ST meta-tensor error on device={device}: {e}. Falling back to CPU.")
	- st = SentenceTransformer(
	- model_name,
	- device="cpu",
	- trust_remote_code=True,
	- )
	- return st
	-
	-def _read_line(prompt="> "):
	- # Avoid prompting when there’s no interactive terminal (e.g., Hugging Face space)
	- if not sys.stdin or not sys.stdin.isatty():
	- prompt = ""
	- try:
	- return input(prompt)
	- except EOFError:
	- return None
	-def _pip_install(pkgs):
	- try:
	- subprocess.check_call([sys.executable, "-m", "pip", "install", "-q"] + pkgs)
	- except Exception as e:
	- print("Bootstrap warning:", e, flush=True)
	-
	-def _datasets_ok():
	- try:
	- spec = importlib.util.find_spec("datasets")
	- if spec is None:
	- return False
	- import datasets as _ds
	- if getattr(_ds, "__spec__", None) is None:
	- return False
	- for name in ("Dataset","DatasetDict","IterableDataset","IterableDatasetDict","Value"):
	- if not hasattr(_ds, name):
	- return False
	- return True
	- except Exception:
	- return False
	-
	-# Try to ensure a real, recent datasets exists
	-if not _datasets_ok():
	- _pip_install(["datasets>=2.16,<3"])
	-# If still not OK, hard-stub it so imports don’t crash
	-if not _datasets_ok():
	- ds = types.ModuleType("datasets")
	- ds.__file__ = "<stub:datasets>"
	- ds.__path__ = []
	- ds.__spec__ = importlib.machinery.ModuleSpec("datasets", loader=None, is_package=True)
	- class _Base: # simple no-op base
	- def __init__(self, a, *k): pass
	- class Dataset(_Base):
	- def map(self,a,*k): return self
	- def filter(self,a,*k): return self
	- def select(self,a,*k): return self
	- def shuffle(self,a,*k): return self
	- def train_test_split(self,a,*k): return {"train": self, "test": self}
	- class IterableDataset(_Base): pass
	- class DatasetDict(dict): pass
	- class IterableDatasetDict(dict): pass
	- class Value:
	- def __init__(self, dtype="string"): self.dtype = dtype
	- ds.Dataset = Dataset
	- ds.DatasetDict = DatasetDict
	- ds.IterableDataset = IterableDataset
	- ds.IterableDatasetDict = IterableDatasetDict
	- ds.Value = Value
	- sys.modules["datasets"] = ds
	-# ============================================================================
	-
	-# === LIGHTWEIGHT ST IMPORT + FALLBACK ========================================
	-# IMPORTANT: avoid "from sentence_transformers import SentenceTransformer"
	-# (that import triggers cross_encoder -> datasets)
	-try:
	- from sentence_transformers.SentenceTransformer import SentenceTransformer
	- _ST_AVAILABLE = True
	-except Exception as _e:
	- print("[WARN] sentence-transformers direct import failed:", _e, flush=True)
	- SentenceTransformer = None
	- _ST_AVAILABLE = False
	-
	-def get_embedder(model_name: str = "sentence-transformers/all-MiniLM-L6-v2", device: str \| None = None):
	- """
	- Returns a SentenceTransformer if available, otherwise a pure-Transformers
	- fallback with mean pooling and L2 normalization (API-compatible encode()).
	- """
	- if _ST_AVAILABLE:
	- try:
	- return SentenceTransformer(model_name, device=device)
	- except Exception as e:
	- print("[WARN] ST model init failed, falling back to plain Transformers:", e, flush=True)
	-
	- # Fallback: plain Transformers embedder
	- from transformers import AutoTokenizer, AutoModel
	- import torch
	-
	- tok = AutoTokenizer.from_pretrained(model_name)
	- mdl = AutoModel.from_pretrained(model_name)
	- if device:
	- mdl.to(device)
	- mdl.eval()
	-
	- class _Embedder:
	- def encode(self, texts, convert_to_tensor=False, normalize_embeddings=True, batch_size=32, **kw):
	- if isinstance(texts, str):
	- texts = [texts]
	- out_chunks = []
	- with torch.no_grad():
	- for i in range(0, len(texts), batch_size):
	- batch = texts[i:i+batch_size]
	- enc = tok(batch, padding=True, truncation=True, return_tensors="pt")
	- if device:
	- enc = {k: v.to(device) for k, v in enc.items()}
	- last_hidden = mdl(**enc).last_hidden_state # [B, T, H]
	- mask = enc["attention_mask"].unsqueeze(-1) # [B, T, 1]
	- pooled = (last_hidden * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1e-9) # mean pooling
	- if normalize_embeddings:
	- pooled = torch.nn.functional.normalize(pooled, p=2, dim=1)
	- out_chunks.append(pooled.detach().cpu())
	- embs = torch.cat(out_chunks, dim=0)
	- return embs if convert_to_tensor else embs.numpy()
	-
	- return _Embedder()
	-# ============================================================================
	-
	-
	-# -- coding: utf-8 --
	-"""
	-Hive — FULL COMBINED (Original-first, Tutor+ • SAFE v5 BACKGROUND)
	-- Embeds your fixed base (as `hive_base`)
	-- Tutor+ (retrieval + gentle phonics/CEFR/essay review)
	-- Instant boot: `datasets` stubbed at import
	-- Background-only staged condensed-curves builder:
	- • Streams dataset text -> embeds -> FAISS -> prunes caches
	- • No UI elements; runs automatically when needed and on a schedule
	-"""
	-
	-import os, sys, types, threading, time, base64, json, re, shutil
	-from pathlib import Path
	-from typing import List, Dict, Optional
	-
	-# ---------- Storage policy ----------
	-MAX_GB = float(os.getenv("HIVE_MAX_CACHE_GB", "8")) # cache budget (GB)
	-HF_HOME = Path(os.getenv("HF_HOME", str(Path.home() / ".cache" / "huggingface")))
	-TRANSFORMERS_CACHE = Path(os.getenv("TRANSFORMERS_CACHE", str(HF_HOME / "transformers")))
	-DATASETS_CACHE = Path(os.getenv("HF_DATASETS_CACHE", str(HF_HOME / "datasets")))
	-ALLOW_KEEP = [os.getenv("HIVE_MODEL_ID","TinyLlama/TinyLlama-1.1B-Chat-v1.0").split("/")[-1],
	- os.getenv("HIVE_EMB_ID","sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2").split("/")[-1]]
	-
	-def _dir_size_bytes(p: Path) -> int:
	- total = 0
	- if not p.exists(): return 0
	- for root, _, files in os.walk(p):
	- for f in files:
	- try: total += (Path(root)/f).stat().st_size
	- except Exception: pass
	- return total
	-
	-def _prune_path_to_limit(root: Path, max_bytes: int, allow_keep=None, log=None):
	- if not root.exists(): return
	- files = []
	- for rp, _, fns in os.walk(root):
	- for fn in fns:
	- fp = Path(rp)/fn
	- try:
	- st = fp.stat()
	- rel = str(fp).lower()
	- if allow_keep and any(k.lower() in rel for k in allow_keep):
	- continue
	- files.append((st.st_mtime, st.st_size, fp))
	- except Exception:
	- pass
	- files.sort() # oldest first
	- size = _dir_size_bytes(root)
	- if log: log(f"[prune] {root} size: {size/1e9:.2f} GB; target: {max_bytes/1e9:.2f} GB")
	- i = 0
	- while size > max_bytes and i < len(files):
	- _, s, fp = files[i]
	- try:
	- fp.unlink()
	- size -= s
	- if log: log(f"[prune] delete {fp} (-{s/1e6:.1f} MB)")
	- except Exception as e:
	- if log: log(f"[prune] skip {fp}: {e}")
	- i += 1
	-
	-def enforce_cache_budget(log=None):
	- max_bytes = int(MAX_GB * (1024**3))
	- for p in [TRANSFORMERS_CACHE, DATASETS_CACHE]:
	- _prune_path_to_limit(Path(p), max_bytes, allow_keep=ALLOW_KEEP, log=log)
	-
	-# ---------- Stub datasets at import for instant boot ----------
	-if os.getenv("HIVE_DISABLE_DATASETS", "1").lower() in ("1","true","yes","on"):
	- import importlib.machinery as _mach
	- ds = types.ModuleType("datasets")
	- # Provide a proper ModuleSpec so importlib.util.find_spec("datasets") does not crash
	- ds.__spec__ = _mach.ModuleSpec("datasets", loader=None)
	- ds.__path__ = [] # mark as package-like for safety
	- ds.__version__ = "0.0-stub"
	-
	- class _Empty:
	- def __iter__(self): return iter([])
	- def __getitem__(self, k): return []
	- def map(self, a, *k): return self
	- def filter(self, a, *k): return self
	- def select(self, a, *k): return self
	- def shuffle(self, a, *k): return self
	- def train_test_split(self, a, *k): return {"train": self, "test": self}
	- def to_pandas(self): import pandas as pd; return pd.DataFrame()
	-
	- # Expose symbols sentence-transformers expects to import
	- ds.Dataset = _Empty
	- ds.IterableDataset = _Empty
	- ds.DatasetDict = dict
	-
	- def _disabled_load_dataset(args, *kwargs):
	- print("[datasets] disabled via HIVE_DISABLE_DATASETS; returning empty dataset.")
	- return _Empty()
	-
	- ds.load_dataset = _disabled_load_dataset
	- sys.modules["datasets"] = ds
	-
	-os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
	-os.environ.setdefault("HIVE_DOWNLOAD_DATASETS_ON_START", "0") # handled by staged builder
	-os.environ.setdefault("HIVE_DATASETS_LIST", "wi_locness")
	-os.environ.setdefault("HIVE_BUILD_CONDENSED_CURVES_ON_START", "1")
	-os.environ.setdefault("HIVE_CURVES_TARGET_MIN", "10000") # target #items in index
	-os.environ.setdefault("HIVE_CURVES_RECHECK_SECS", "1800") # 30 minutes
	-os.environ.setdefault("HIVE_STAGE_BATCH", "128")
	-os.environ.setdefault("HIVE_STAGE_SAVE_EVERY", "512")
	-os.environ.setdefault("HIVE_STAGE_MAX_DOCS_PER_DATASET", "5000")
	-
	#!/usr/bin/env python3
	# --- BEGIN MEMORY MANIFEST (auto-updated) ---
	# (This block is auto-written by Hive to record what datasets/files
	@@ -791,15 +208,15 @@
	# + Internal Optimization Stack (Change Manager: propose ➡️ sandbox ➡️ A/B test ➡️ apply/rollback with Owner policy)
	# Upload this single file and requirements.txt to a Hugging Face Space (or run locally).
	# - python hive_full_merged_all_in_one_optimized.py
	-
	-import os, sys, re, json, time, shutil, tempfile, subprocess, platform, socket, threading, importlib, hashlib, unicodedata, urllib.request
	+
	+import os, sys, re, json, time, shutil, tempfile, subprocess, platform, socket, threading, importlib, hashlib, unicodedata, urllib.request, base64
	from dataclasses import dataclass
	from typing import Optional, List, Dict, Tuple
	-
	+
	# ----------- light bootstrap (safe) -----------
	def _ensure(pkgs):
	for p in pkgs:
	- mod = p.split("==")[0].split(">=")[0].split("[")[0]
	+ mod = p.split("==")[0].split(">=")[0].split("<=")[0].split("[")[0]
	try:
	importlib.import_module(mod)
	except Exception:
	@@ -807,13 +224,13 @@
	subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", p])
	except Exception:
	pass
	-
	+
	_ensure(["numpy>=1.24.0","psutil>=5.9.0","requests>=2.31.0","gradio>=4.44.0","sentence-transformers>=3.0.0","faiss-cpu>=1.8.0",
	"transformers>=4.44.0","accelerate>=0.33.0","datasets>=2.21.0","soundfile>=0.12.1","faster-whisper>=1.0.0","langid>=1.1.6",
	"piper-tts>=1.2.0","g2p_en>=2.1.0","librosa>=0.10.1","scikit-learn>=1.1.0","feedparser>=6.0.11","duckduckgo_search>=6.2.10",
	"keyring>=24.3.1"])
	-
	-import numpy as np, psutil, requests, feedparser, langid, librosa, gradio as gr
	+
	+import numpy as np, psutil, requests, feedparser, langid, librosa, gradio as gr, soundfile as sf
	from sentence_transformers import SentenceTransformer
	from duckduckgo_search import DDGS
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	@@ -821,29 +238,29 @@
	from piper.voice import PiperVoice
	from g2p_en import G2p
	from sklearn.metrics.pairwise import cosine_similarity
	-
	+
	try:
	import torch
	except Exception:
	torch=None
	-
	+
	try:
	import faiss
	except Exception:
	subprocess.check_call([sys.executable,"-m","pip","install","--upgrade","faiss-cpu>=1.8.0"])
	import faiss
	-
	+
	# Optional vision
	try:
	import cv2; _HAVE_CV=True
	except Exception:
	_HAVE_CV=False
	try:
	- import pytesseract; _HAVE_TESS=True
	+ from PIL import Image
	+ import pytesseract; _HAVE_TESS=True and _HAVE_CV
	except Exception:
	_HAVE_TESS=False
	-
	+
	try:
	import keyring
	except Exception:
	@@ -857,7 +274,7 @@
	if cast is bool: return str(v).lower() in ("1","true","yes","on")
	if cast is int:
	try: return int(v)
	- except: return int(float(v))
	+ except (ValueError, TypeError): return int(float(v))
	return v

	CFG={
	@@ -870,14 +287,14 @@
	"HIVE_INGEST_STAGED": ENV("HIVE_INGEST_STAGED", "1", bool),
	"HIVE_INGEST_STAGE_SIZE": ENV("HIVE_INGEST_STAGE_SIZE", "3", int),
	"HIVE_INGEST_MIN_FREE_GB": ENV("HIVE_INGEST_MIN_FREE_GB", "8", int),
	- "HIVE_INGEST_NEXT": ENV("HIVE_INGEST_NEXT", "0", bool),
	+ "HIVE_INGEST_NEXT": ENV("HIVE_INGEST_NEXT", "0", bool), # run one stage this boot

	# self-edit manifest controls
	"HIVE_ALLOW_SELF_WRITE_MANIFEST": ENV("HIVE_ALLOW_SELF_WRITE_MANIFEST", "1", bool),
	"HIVE_SELF_WRITE_FILE": ENV("HIVE_SELF_WRITE_FILE", "", str),

	# memory auto-restore controls (admin memory)
	- "CURVES_AUTO_RESTORE": ENV("HIVE_CURVES_AUTO_RESTORE", "1", bool),
	+ "HIVE_CURVES_AUTO_RESTORE": ENV("HIVE_CURVES_AUTO_RESTORE", "1", bool),
	"CURVES_ARCHIVE_LOCAL": ENV("HIVE_CURVES_ARCHIVE_LOCAL", "curves.tar.gz", str),
	"CURVES_ARCHIVE_URL": ENV("HIVE_CURVES_ARCHIVE_URL", "", str),
	"CURVES_HF_DATASET": ENV("HIVE_CURVES_HF_DATASET", "", str),
	@@ -889,11 +306,11 @@
	"STATE_DIR": ENV("HIVE_STATE_DIR","./state"),
	"LAUNCH_UI": ENV("HIVE_LAUNCH_UI","1",bool),
	"LLM_AUTOSIZE": ENV("HIVE_LLM_AUTOSIZE","1",bool),
	- "LLM_MAX_VRAM_GB": ENV("HIVE_LLM_MAX_VRAM_GB","0"),
	+ "LLM_MAX_VRAM_GB": ENV("HIVE_LLM_MAX_VRAM_GB","0", int),
	"MODEL_OVERRIDE": ENV("HIVE_MODEL_ID",""),
	"CTX_TOKENS": ENV("HIVE_CTX_TOKENS","2048",int),
	- "OWNER_NAME": ENV("HIVE_OWNER_USER","Rose"),
	- "OWNER_PASS": ENV("HIVE_OWNER_PASS","Fehr2008"),
	+ "OWNER_NAME": ENV("HIVE_OWNER_USER","Rose"), # Default Owner name
	+ "OWNER_PASS": ENV("HIVE_OWNER_PASS","Fehr2008"), # Default Owner password
	"OWNER_SECOND": ENV("HIVE_OWNER_SECOND","Paulbear01"),
	"AGENT_NAME": ENV("HIVE_AGENT_NAME","Hive"),
	"NO_PROFANITY": ENV("HIVE_NO_PROFANITY","1",bool),
	@@ -907,6 +324,8 @@
	"ONLINE_MAX_RESULTS": ENV("HIVE_ONLINE_MAX_RESULTS","5",int),
	"ONLINE_TRIGGER": ENV("HIVE_ONLINE_TRIGGER","auto",str),
	# bounded self governance
	+ "HIVE_USE_HF_INFERENCE": ENV("HIVE_USE_HF_INFERENCE","0",bool),
	+ "HIVE_HF_ENDPOINT": ENV("HIVE_HF_ENDPOINT","",str),
	"ALLOW_SELF_REBOOT": ENV("HIVE_ALLOW_SELF_REBOOT","1",bool),
	"ALLOW_RUNTIME_HOTPATCH": ENV("HIVE_ALLOW_RUNTIME_HOTPATCH","1",bool),
	"AUTO_SELF_OPTIMIZE": ENV("HIVE_AUTO_SELF_OPTIMIZE","1",bool),
	@@ -914,7 +333,7 @@
	"OPT_ENABLE": ENV("HIVE_OPT_ENABLE","1",bool),
	"OPT_AUTO_APPLY": ENV("HIVE_OPT_AUTO_APPLY","0",bool), # OWNER MAY SET TO 1
	"OPT_PKG_ALLOWLIST": ENV("HIVE_OPT_PKG_ALLOWLIST","transformers,accelerate,datasets,sentence-transformers,faiss-cpu,duckduckgo_search,feedparser,requests,gradio").split(","),
	- "OPT_MODEL_ALLOWLIST": ENV("HIVE_OPT_MODEL_ALLOWLIST","meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-3B,meta-llama/Llama-3.1-8B-Instruct,meta-llama/Llama-3.1-13B-Instruct,TinyLlama/TinyLlama-1.1B-Chat-v1.0").split(","),
	+ "OPT_MODEL_ALLOWLIST": ENV("HIVE_OPT_MODEL_ALLOWLIST","meta-llama/Meta-Llama-3.1-8B-Instruct,meta-llama/Meta-Llama-3.1-70B-Instruct,TinyLlama/TinyLlama-1.1B-Chat-v1.0").split(","),
	"OPT_THRESH_LATENCY_MS": ENV("HIVE_OPT_THRESH_LATENCY_MS","0",int),
	"OPT_THRESH_TOKS_PER_S": ENV("HIVE_OPT_THRESH_TOKS_PER_S","0",float),
	"OPT_THRESH_QUALITY": ENV("HIVE_OPT_THRESH_QUALITY","0.02",float),
	@@ -944,22 +363,20 @@

	CANDIDATES=[
	("TinyLlama/TinyLlama-1.1B-Chat-v1.0", 0),
	- ("meta-llama/Llama-3.2-1B",0),
	- ("meta-llama/Llama-3.2-3B",0),
	- ("meta-llama/Llama-3.1-8B-Instruct",12),
	- ("meta-llama/Llama-3.1-13B-Instruct",20)
	+ ("meta-llama/Meta-Llama-3.1-8B-Instruct",12),
	+ ("meta-llama/Meta-Llama-3.1-70B-Instruct",100)
	]
	def pick_model()->Tuple[str,dict]:
	if CFG["MODEL_OVERRIDE"]:
	return CFG["MODEL_OVERRIDE"], {"device":"cuda" if _has_gpu_env() else "cpu"}
	- max_vram=int(CFG["LLM_MAX_VRAM_GB"] or "0")
	+ max_vram=CFG["LLM_MAX_VRAM_GB"]
	if _has_gpu_env():
	- for mid,need in CANDIDATES:
	+ for mid,need in reversed(CANDIDATES):
	if need and (max_vram==0 or need<=max_vram):
	return mid, {"device":"cuda"}
	else:
	ram=psutil.virtual_memory().total/(1024**3)
	- for mid,need in CANDIDATES:
	+ for mid,need in reversed(CANDIDATES):
	if need==0 and ram>=6: return mid, {"device":"cpu"}
	return "TinyLlama/TinyLlama-1.1B-Chat-v1.0", {"device":"cpu"}

	@@ -993,7 +410,7 @@
	for i in I[0]:
	if 0<=i<len(lines):
	try: out.append(json.loads(lines[i]))
	- except: pass
	+ except json.JSONDecodeError: pass
	return out
	def search_with_scores(self, query:str, k:int=6):
	if self.index.ntotal==0: return [], []
	@@ -1005,7 +422,7 @@
	if 0<=idx<len(lines):
	try:
	metas.append(json.loads(lines[idx]))
	- scores.append(float(max(0.0, min(1.0, sc))))
	+ scores.append(float(max(0.0, min(1.0, sc if sc is not None else 0.0))))
	except: pass
	return metas, scores

	@@ -1025,7 +442,7 @@
	# ----------- HF Datasets bootstrap -----------
	DEFAULT_SOURCES=["jhu-clsp/jflue","bea2019st/wi_locness","fce-m2109/mascorpus","rajpurkar/squad_v2",
	"OpenRL/daily_dialog","tetti/spelling-dataset-extended","Helsinki-NLP/opus-100","facebook/flores",
	- "HuggingFaceH4/Multilingual-Thinking","bigscience/xP3","allenai/sciq","allenai/c4",
	+ "HuggingFaceH4/no_robots","bigscience/xP3","allenai/sciq","allenai/c4",
	"mozilla-foundation/common_voice_17_0","bene-ges/en_cmudict","openslr/librispeech_asr","conceptnet5/conceptnet5","grammarly/coedit"]

	def _iter_text(dataset_name:str, split="train"):
	@@ -1033,7 +450,7 @@
	try:
	ds=load_dataset(dataset_name, split=split, streaming=True)
	except Exception:
	- ds=load_dataset(dataset_name, split=split)
	+ ds=load_dataset(dataset_name, split=split, trust_remote_code=True)
	for ex in ds:
	text = ex.get("text") or ex.get("sentence") or ex.get("content") or ex.get("question")
	if not text:
	@@ -1077,7 +494,7 @@
	if len(bt)>=caps["batch"]:
	lib.ingest_pairs(bt,bm,scope); count+=len(bt); count_total+=len(bt); bt,bm=[],[]
	if count>=caps["max_docs"]: break
	- if bt: lib.ingest_pairs(bt,bm,scope); count+=len(bt); count_total+=len(bt)
	+ if bt: lib.ingest_pairs(bt,bm,scope); count+=len(bt); count_total+=len(bt); bt,bm=[],[]
	with open(logf,"a",encoding="utf-8") as f: f.write(json.dumps({"dataset":ds,"ingested":count})+"\n")
	except Exception as e:
	with open(logf,"a",encoding="utf-8") as f: f.write(json.dumps({"dataset":ds,"error":str(e)})+"\n")
	@@ -1109,7 +526,7 @@
	for e in f.entries[:limit]:
	items.append({"title":e.get("title",""),"link":e.get("link",""),"summary":e.get("summary") or e.get("description",""),"published":e.get("published") or e.get("updated",""),"source":u})
	except Exception:
	- pass
	+ # consider logging this error
	+ pass
	return items

	def web_search_snippets(query:str, max_results:int=5, timeout:int=8)->list:
	@@ -1120,7 +537,7 @@
	if r and r.get("body"):
	out.append({"title":r.get("title",""),"href":r.get("href",""),"body":r.get("body","")})
	except Exception:
	- pass
	+ # consider logging this error
	+ pass
	return out

	# ----------- RBAC / users / lockouts -----------
	@@ -1202,11 +619,11 @@
	"zh": ("https://github.com/rhasspy/piper/releases/download/v0.0.2/zh_CN-huayan-low.onnx",
	"https://github.com/rhasspy/piper/releases/download/v0.0.2/zh_CN-huayan-low.onnx.json"),
	}
	-def _download(url,dst):
	+def _download(url,dst, timeout=30):
	if os.path.exists(dst): return dst
	- os.makedirs(os.path.dirname(dst),exist_ok=True); urllib.request.urlretrieve(url,dst); return dst
	+ os.makedirs(os.path.dirname(dst),exist_ok=True); urllib.request.urlretrieve(url,dst); return dst # TODO: add timeout
	_TTS_CACHE={}
	-def get_tts(lang="en"):
	+def get_tts(lang="en") -> PiperVoice:
	lang=lang if lang in PIPER_MODELS else "en"
	if lang in _TTS_CACHE: return _TTS_CACHE[lang]
	mu,cu=PIPER_MODELS[lang]; m=_download(mu,f"./models/piper/{os.path.basename(mu)}"); c=_download(cu,f"./models/piper/{os.path.basename(cu)}")
	@@ -1216,13 +633,13 @@
	y, sr = librosa.load(path, sr=16000)
	mf=librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
	return mf.mean(axis=1)
	-def enroll_voice(uid:str, path:str)->bool:
	+def enroll_voice(uid:str, path:str) -> bool:
	db=_load_json(VOICES_DB, {}); db[uid]=_embed_mfcc(path).astype(float).tolist(); _save_json(VOICES_DB, db); return True
	-def identify_voice(path:str, threshold:float=0.70)->Optional[str]:
	+def identify_voice(path:str, threshold:float=0.70) -> Optional[str]:
	db=_load_json(VOICES_DB, {});
	if not db: return None
	emb=_embed_mfcc(path).reshape(1,-1)
	- keys=list(db.keys()); mats=np.vstack([np.array(db[k]).reshape(1,-1) for k in keys]).squeeze()
	+ keys=list(db.keys()); mats=np.array([db[k] for k in keys])
	sims=cosine_similarity(emb, mats)[0]; i=int(np.argmax(sims)); return keys[i] if sims[i]>=threshold else None

	_BASIC={'a':'a as in apple /æ/','e':'e as in elephant /ɛ/','i':'i as in igloo /ɪ/','o':'o as in octopus /ɒ/','u':'u as in umbrella /ʌ/',
	@@ -1257,10 +674,9 @@
	sr=None; mix=None
	for ch, lg in chunks or [(text, fallback)]:
	lg2=lg if lg in PIPER_MODELS else fallback
	- v=get_tts(lg2); aud=v.synthesize(ch)
	+ v=get_tts(lg2); aud, _ = v.synthesize(ch)
	if sr is None: sr=v.sample_rate
	mix = aud if mix is None else np.concatenate([mix,aud])
	outp=os.path.join(tempfile.gettempdir(), f"hive_tts_{int(time.time())}.wav")
	- import soundfile as sf
	sf.write(outp, mix if mix is not None else np.zeros(1), sr or 22050, subtype="PCM_16")
	return outp

	@@ -1309,7 +725,7 @@
	self.router_rules=[]
	def choose_route(self, msg:str)->str:
	for pat in self.router_rules or []:
	- if pat.search(msg):
	+ if isinstance(pat, re.Pattern) and pat.search(msg):
	s=pat.pattern.lower()
	if "translation" in s: return "translation"
	if "vision" in s: return "vision"
	@@ -1341,7 +757,7 @@
	out.sort(key=lambda x: x.get("priority",0), reverse=True); return out
	def _get_saved_password(ssid:str)->Optional[str]:
	if keyring:
	- try: return keyring.get_password("hive_wifi", ssid)
	+ try: return keyring.get_password("hive_wifi", ssid) or ""
	except Exception: return None
	return None
	def _connect_linux(ssid, password, timeout=12)->Tuple[bool,str]:
	@@ -1404,7 +820,7 @@
	now=time.time()
	if now-self.last_attempt<self.cooldown_s: return
	self.last_attempt=now
	- if self.thread and self.thread.is_alive(): return
	+ if self.thread and self.thread.is_alive(): return # type: ignore
	self.thread=threading.Thread(target=self.run_once, daemon=True); self.thread.start()

	NET=AutoConnector()
	@@ -1413,7 +829,7 @@
	def coverage_score_from_snippets(snippets: list, scores: list) -> float:
	if not snippets or not scores: return 0.0
	s = sorted(scores, reverse=True)[:3]
	- base = sum(s)/len(s)
	+ base = sum(s)/len(s) if s else 0.0
	bonus = min(0.15, 0.03 * len(snippets))
	return float(max(0.0, min(1.0, base + bonus)))

	@@ -1435,7 +851,7 @@
	hive.decoding_temperature=float(o.get("temperature",0.7)); hive.decoding_temperature=max(0.0,min(1.5,hive.decoding_temperature))
	rr=o.get("router_rules") or []
	if isinstance(rr,list):
	- try: hive.engine.router_rules=[re.compile(pat,re.I) for pat in rr if isinstance(pat,str)]
	+ try: hive.engine.router_rules=[re.compile(pat,re.I) for pat in rr if isinstance(pat,str) and pat]
	except re.error: hive.engine.router_rules=[]
	t=o.get("web_threshold",None); hive.web_threshold=float(t) if isinstance(t,(int,float)) else 0.40
	def patch(self, patch:dict, actor_role:str="hive")->Tuple[bool,str]:
	@@ -1468,7 +884,7 @@
	if not CFG["AUTO_SELF_OPTIMIZE"]: continue
	vm=psutil.virtual_memory(); ovr={}
	if vm.percent>88:
	- ovr["token_budget"]=max(512,int(0.75*(self.hive.compiler.override_budget or CFG["CTX_TOKENS"])))
	+ ovr["token_budget"]=max(512,int(0.75*(self.hive.compiler.override_budget or CFG["CTX_TOKENS"]))) # type: ignore
	ovr["temperature"]=max(0.2,self.hive.decoding_temperature-0.1)
	lat=(sum(self.hive.engine.stats["latency_ms"][-10:])/max(1,len(self.hive.engine.stats["latency_ms"][-10:]))) if self.hive.engine.stats["latency_ms"] else 0
	if lat>1200: ovr["retrieval_k"]=max(3,self.hive.retrieval_k-1)
	@@ -1520,7 +936,7 @@
	for p in prompts:
	t0=time.time()
	h=hive_factory()
	- out=h.pipe(h.compiler.compile(p, []), max_new_tokens=64, do_sample=False, temperature=0.2)
	+ out=h.pipe(h.compiler.compile(p, []), max_new_tokens=64, do_sample=False, temperature=0.2) # type: ignore
	t1=time.time()
	text=out[0]["generated_text"]
	lat_ms.append((t1-t0)*1000)
	@@ -1551,7 +967,7 @@
	model_override=proposal.name
	elif proposal.kind=="code":
	target=os.path.basename(__file__); patched=os.path.join(sand.root,target)
	- open(patched,"w",encoding="utf-8").write(proposal.patch_text or "")
	+ with open(patched,"w",encoding="utf-8") as f: f.write(proposal.patch_text or "")
	code=f"import importlib.util, json; p=r'{patched}'; spec=importlib.util.spec_from_file_location('hmod',p); m=importlib.util.module_from_spec(spec); spec.loader.exec_module(m); h=m.Hive(); print(json.dumps({{'ok':True}}))"
	rc,out=sand.run_snippet(code)
	if rc!=0 or '"ok": true' not in out.lower(): return {"ok":False,"reason":"patch smoke test failed","out":out}
	@@ -1582,7 +998,7 @@
	if kind=="model":
	if not self._allowed_model(name): return False,"model not allowlisted"
	pref=os.path.join(OPT_DIR,"preferred_model.json"); json.dump({"model_id":name,"ts":time.time()}, open(pref,"w",encoding="utf-8"))
	- return True,"model preference recorded (takes effect after restart)"
	+ return True,"model preference recorded (takes effect after restart)" # type: ignore
	if kind=="code":
	if not CFG["OPT_AUTO_APPLY"]: return False,"awaiting Owner approval for code changes"
	try:
	@@ -1600,7 +1016,7 @@

	def _human_ts(ts: int) -> str:
	import datetime
	- try:
	+ try: # type: ignore
	return datetime.datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S UTC")
	except Exception:
	return str(ts)
	@@ -1691,7 +1107,7 @@
	z.extractall(dest_dir)
	return True
	except Exception as e:
	- open(os.path.join(CFG.get("STATE_DIR","./state"), "restore_error.log"), "a", encoding="utf-8").write(f"extract: {e}\n")
	+ with open(os.path.join(CFG.get("STATE_DIR","./state"), "restore_error.log"), "a", encoding="utf-8") as f: f.write(f"extract: {e}\n")
	return False

	def _restore_from_local_archive(curve_dir: str):
	@@ -1711,7 +1127,7 @@
	try: os.remove(tmp)
	except: pass
	return (ok, "restored from URL" if ok else "URL extract failed")
	- except Exception as e:
	+ except Exception as e: # type: ignore
	open(os.path.join(CFG.get("STATE_DIR","./state"), "restore_error.log"), "a", encoding="utf-8").write(f"url: {e}\n")
	return False, "URL download error"

	@@ -1737,14 +1153,14 @@
	# auto-archive after each dataset if configured
	if CFG.get("HIVE_AUTO_ARCHIVE", True) and str(CFG.get("HIVE_AUTO_ARCHIVE_MODE","per_chain")).lower() == "per_dataset":
	try:
	- _ok_arc, _ap = _archive_memory(curve_dir)
	+ _ok_arc, _ap = _archive_memory(curve_dir) # type: ignore
	open(os.path.join(CFG["STATE_DIR"], "archive_status.log"), "a", encoding="utf-8").write(
	json.dumps({"ts": time.time(), "mode": "per_dataset", "ok": _ok_arc, "path": _ap}) + "\n"
	)
	except Exception as _e_arc:
	open(os.path.join(CFG["STATE_DIR"], "archive_error.log"), "a", encoding="utf-8").write(
	"per_dataset: " + str(_e_arc) + "\n"
	- )
	+ ) # type: ignore
	src = os.path.join(local_dir, sub) if sub else local_dir
	if os.path.isdir(src):
	for root, dirs, files in os.walk(src):
	@@ -1755,7 +1171,7 @@
	shutil.copy2(os.path.join(root, fn), os.path.join(dest_root, fn))
	return True, "restored from HF dataset snapshot"
	return False, "HF snapshot missing subpath"
	- except Exception as e:
	+ except Exception as e: # type: ignore
	open(os.path.join(CFG.get("STATE_DIR","./state"), "restore_error.log"), "a", encoding="utf-8").write(f"hf: {e}\n")
	return False, "HF restore error"

	@@ -1800,7 +1216,7 @@


	if not CFG.get("CURVES_AUTO_RESTORE", True):
	- return False, "auto-restore disabled"
	+ return False, "auto-restore disabled" # type: ignore
	if _curves_present(curve_dir):
	return True, "curves already present"
	ok, msg = _restore_from_local_archive(curve_dir)
	@@ -1842,7 +1258,7 @@
	# manifest update
	try:
	vecs = 0
	- try:
	+ try: # type: ignore
	vecs = CurveStore(curve_dir).index.ntotal
	except Exception:
	pass
	@@ -1871,7 +1287,7 @@
	# auto-archive after chain if configured
	if CFG.get("HIVE_AUTO_ARCHIVE", True) and str(CFG.get("HIVE_AUTO_ARCHIVE_MODE","per_chain")).lower() in ("per_chain","perdataset","per-dataset"):
	try:
	- _ok_arc, _ap = _archive_memory(curve_dir)
	+ _ok_arc, _ap = _archive_memory(curve_dir) # type: ignore
	open(os.path.join(CFG["STATE_DIR"], "archive_status.log"), "a", encoding="utf-8").write(json.dumps({"ts":time.time(),"mode":"per_chain","ok":_ok_arc,"path":_ap})+"\n")
	except Exception as _e_arc:
	open(os.path.join(CFG["STATE_DIR"], "archive_error.log"), "a", encoding="utf-8").write("per_chain: "+str(_e_arc)+"\n")
	@@ -1884,12 +1300,12 @@
	def __init__(self, model_id: Optional[str]=None, device: Optional[str]=None):
	# --- try restoring memory if missing (local archive / URL / HF dataset) ---
	try:
	- ok_restored, restore_msg = restore_curves_if_missing(CFG["CURVE_DIR"] if "CURVE_DIR" in CFG else CFG.get("MEMORY_DIR","./curves"))
	+ ok_restored, restore_msg = restore_curves_if_missing(CFG["CURVE_DIR"])
	open(os.path.join(CFG["STATE_DIR"], "restore_status.log"), "a", encoding="utf-8").write(json.dumps({"ok":bool(ok_restored),"msg":restore_msg,"ts":time.time()})+"\n")
	except Exception as e:
	open(os.path.join(CFG["STATE_DIR"], "restore_error.log"), "a", encoding="utf-8").write("restore: "+str(e)+"\n")
	# --- staged ingestion chaining (run next stages automatically if enabled) ---
	try:
	- _ing_chain = staged_ingest_chain_if_enabled(CFG["CURVE_DIR"] if "CURVE_DIR" in CFG else CFG.get("MEMORY_DIR","./curves"))
	+ _ing_chain = staged_ingest_chain_if_enabled(CFG["CURVE_DIR"])
	open(os.path.join(CFG["STATE_DIR"], "ingest_chain_status.log"), "a", encoding="utf-8").write(json.dumps({"ts":time.time(),"chain":_ing_chain})+"\n")
	except Exception as e:
	open(os.path.join(CFG["STATE_DIR"], "ingest_error.log"), "a", encoding="utf-8").write("chain: "+str(e)+"\n")
	@@ -1897,7 +1313,7 @@
	if ok_restored:
	try:
	if CurveStore(CFG["CURVE_DIR"] if "CURVE_DIR" in CFG else CFG.get("MEMORY_DIR","./curves")).index.ntotal > 0:
	- _mark_offline_ready()
	+ _mark_offline_ready() # type: ignore
	except Exception:
	pass
	except Exception as e:
	@@ -1926,7 +1342,7 @@
	kwargs.update(dict(torch_dtype=torch.float16, device_map="auto"))

	# --- Model / Tokenizer initialization (supports local transformers or Hugging Face Inference API) ---
	- use_remote = str(os.getenv("HIVE_USE_HF_INFERENCE","0")).strip() in ("1","true","yes")
	+ use_remote = CFG["HIVE_USE_HF_INFERENCE"]
	if use_remote:
	# Remote path using huggingface_hub.InferenceClient
	try:
	@@ -1934,7 +1350,7 @@
	except Exception as e:
	raise RuntimeError(f"HIVE_USE_HF_INFERENCE=1 but huggingface_hub is missing: {e}")
	endpoint = os.getenv("HIVE_HF_ENDPOINT","").strip() or None
	- token = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") or None
	+ token = CFG["HF_READ_TOKEN"] or os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") or None
	self.client = InferenceClient(model=self.model_id if endpoint is None else None,
	token=token,
	timeout= int(os.getenv("HIVE_HF_TIMEOUT","60") or "60"),
	@@ -2043,7 +1459,7 @@
	# ----------- OCR helper -----------
	def ocr_text_from_image_bgr(image_bgr)->str:
	if not (_HAVE_CV and _HAVE_TESS): return ""
	- gray=cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
	+ gray=cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY) # type: ignore
	return pytesseract.image_to_string(gray) or ""

	# --------------- UI ---------------
	@@ -2115,7 +1531,7 @@
	text=asr_transcribe(path, uid, None if asr_lg=="auto" else asr_lg)
	prof=_load_json(ADAPT_DB,{}); p=prof.get(uid or "guest",{})
	dur=librosa.get_duration(filename=path) or 0.001
	- syl=len(re.findall(r"[aeiouyAEIOUY]+", text)); rate=(syl/dur)
	+ syl=len(re.findall(r"[aeiouyAEIOUY]+", text)); rate=(syl/dur) if dur > 0 else 0
	p["rate"]=0.8p.get("rate", rate)+0.2rate
	chunks=lid_chunk(text); en_len=sum(len(c) for c,l in chunks if l.startswith("en")); all_len=sum(len(c) for c,l in chunks)
	if all_len>0:
	@@ -2151,7 +1567,7 @@
	if notes: prompt += "\n\n(Phonics)\n" + "\n".join(f"- {n}" for n in notes[:10])
	prof=_load_json(ADAPT_DB,{}); p=prof.get(uid or "guest",{}); p["phonics_on"]=True; prof[uid or "guest"]=p; _save_json(ADAPT_DB,prof)
	ans=hive.chat(prompt, eff, caller_id=uid)
	- wav=synthesize_multilang(ans, CFG["TTS_LANG"]); return ans, wav
	+ wav=synthesize_multilang(ans, CFG["TTS_LANG"]); return ans, wav # type: ignore
	reply_btn.click(do_reply,[uid_state, role_state, mode_state, transcript, phonics_toggle],[reply_text, reply_audio])

	with gr.Accordion("Voice enrollment (add your voiceprint)", open=False):
	@@ -2267,7 +1683,7 @@
	if not res.get("ok"): return f"Test failed: {res.get('reason','unknown')}"
	if _last["obj"].kind=="code" and role!="owner" and not CFG["OPT_AUTO_APPLY"]: return "Awaiting Owner approval for code changes."
	ok,msg=hive.changes.apply(res); return msg if ok else f"Apply failed: {msg}"
	- propose_btn.click(do_propose, [prop_kind,prop_name,prop_ver,prop_reason,prop_patch],[opt_out])
	+ propose_btn.click(do_propose, [prop_kind,prop_name,prop_ver,prop_reason,prop_patch],[opt_out]) # type: ignore
	test_btn.click(lambda: do_test(), [], [opt_out])
	apply_btn.click(do_apply, [role_state, mode_state], [opt_out])

	@@ -2285,1006 +1701,4 @@
	if not s: continue
	print(h.chat(s, effective_role="user", caller_id=None))
	except KeyboardInterrupt:
	- pass
	-
	-
	-hive_base = types.ModuleType("hive_base")
	-hive_base.__dict__["__name__"] = "hive_base"
	-exec(compile(_HIVE_BASE_SOURCE, "hive_tinyllama_hf.py", "exec"), hive_base.__dict__)
	-sys.modules["hive_base"] = hive_base
	-
	-# ---------- Compatibility Monkey Patch for Hive.chat ----------
	-# Some UI callbacks may call Hive.chat(...) without the required keyword-only
	-# args added in newer versions. Patch it so defaults are injected, preventing
	-# errors like: Hive.chat() missing 'effective_role' and 'caller_id'.
	-try:
	- DEFAULT_EFFECTIVE_ROLE = os.getenv("EFFECTIVE_ROLE", "user")
	- DEFAULT_CALLER_ID_CLI = os.getenv("CALLER_ID_CLI", "cli")
	- DEFAULT_CALLER_ID_UI = os.getenv("CALLER_ID_UI", "ui")
	-
	- if hasattr(hive_base, "Hive"):
	- _Hive = hive_base.Hive
	- if hasattr(_Hive, "chat"):
	- _orig_chat = _Hive.chat
	-
	- def _chat_compat(self, args, *kwargs):
	- # Inject defaults only if missing
	- if "effective_role" not in kwargs or kwargs.get("effective_role") is None:
	- kwargs["effective_role"] = DEFAULT_EFFECTIVE_ROLE
	- if "caller_id" not in kwargs or kwargs.get("caller_id") is None:
	- # Prefer UI caller id if we are in the web app
	- if os.getenv("RUNNING_IN_UI", "0") == "1":
	- kwargs["caller_id"] = DEFAULT_CALLER_ID_UI
	- else:
	- kwargs["caller_id"] = DEFAULT_CALLER_ID_CLI
	- return _orig_chat(self, args, *kwargs)
	-
	- # Tag to avoid double-patching
	- if getattr(_Hive.chat, "_compat_patched", False) is False:
	- _chat_compat._compat_patched = True
	- _Hive.chat = _chat_compat
	-except Exception as _e:
	- # Do not fail the app if patching isn't possible
	- pass
	-# -------------------------------------------------------------
	-
	-
	-# ---------- Tutor+ Layer & Curves builder (no UI) ----------
	-try:
	- import numpy as np
	-except Exception:
	- np = None
	-
	-try:
	- import faiss
	- _FAISS = True
	-except Exception:
	- faiss = None; _FAISS = False
	-
	-try:
	- from sentence_transformers import SentenceTransformer
	-except Exception:
	- SentenceTransformer = None
	-
	-def ENV(k, d=None, cast=str):
	- v = os.getenv(k, None)
	- if v is None: return d
	- if cast is bool: return str(v).lower() in ("1","true","yes","on")
	- if cast is int:
	- try: return int(v)
	- except: return int(float(v))
	- return v
	-
	-CFG = {
	- "CURVE_DIR": ENV("HIVE_CURVE_DIR","./state/curves"),
	- "EMB_ID": ENV("HIVE_EMB_ID","sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
	- "EMB_LOCAL_DIR": ENV("HIVE_EMB_LOCAL_DIR",""),
	- "KNN_TOPK": ENV("HIVE_KNN_TOPK","8", int),
	- "KNN_LAMBDA": float(ENV("HIVE_KNN_LAMBDA","0.35")),
	- "PHONICS_MAX_LINES": ENV("HIVE_PHONICS_MAX_LINES","2", int),
	- "STAGE_BATCH": ENV("HIVE_STAGE_BATCH","128", int),
	- "STAGE_SAVE_EVERY": ENV("HIVE_STAGE_SAVE_EVERY","512", int),
	- "STAGE_MAX_DOCS_PER_DATASET": ENV("HIVE_STAGE_MAX_DOCS_PER_DATASET","5000", int),
	- "CURVES_TARGET_MIN": ENV("HIVE_CURVES_TARGET_MIN","10000", int),
	- "CURVES_RECHECK_SECS": ENV


	hive_base = types.ModuleType("hive_base")
	hive_base.__dict__["__name__"] = "hive_base"
	exec(compile(_HIVE_BASE_SOURCE, "hive_tinyllama_hf.py", "exec"), hive_base.__dict__)
	sys.modules["hive_base"] = hive_base

	# ---------- Compatibility Monkey Patch for Hive.chat ----------
	# Some UI callbacks may call Hive.chat(...) without the required keyword-only
	# args added in newer versions. Patch it so defaults are injected, preventing
	# errors like: Hive.chat() missing 'effective_role' and 'caller_id'.
	try:
	DEFAULT_EFFECTIVE_ROLE = os.getenv("EFFECTIVE_ROLE", "user")
	DEFAULT_CALLER_ID_CLI = os.getenv("CALLER_ID_CLI", "cli")
	DEFAULT_CALLER_ID_UI = os.getenv("CALLER_ID_UI", "ui")

	if hasattr(hive_base, "Hive"):
	_Hive = hive_base.Hive
	if hasattr(_Hive, "chat"):
	_orig_chat = _Hive.chat

	def _chat_compat(self, args, *kwargs):
	# Inject defaults only if missing
	if "effective_role" not in kwargs or kwargs.get("effective_role") is None:
	kwargs["effective_role"] = DEFAULT_EFFECTIVE_ROLE
	if "caller_id" not in kwargs or kwargs.get("caller_id") is None:
	# Prefer UI caller id if we are in the web app
	if os.getenv("RUNNING_IN_UI", "0") == "1":
	kwargs["caller_id"] = DEFAULT_CALLER_ID_UI
	else:
	kwargs["caller_id"] = DEFAULT_CALLER_ID_CLI
	return _orig_chat(self, args, *kwargs)

	# Tag to avoid double-patching
	if getattr(_Hive.chat, "_compat_patched", False) is False:
	_chat_compat._compat_patched = True
	_Hive.chat = _chat_compat
	except Exception as _e:
	# Do not fail the app if patching isn't possible
	pass
	# -------------------------------------------------------------


	# ---------- Tutor+ Layer & Curves builder (no UI) ----------
	try:
	import numpy as np
	except Exception:
	np = None

	try:
	import faiss
	_FAISS = True
	except Exception:
	faiss = None; _FAISS = False

	try:
	from sentence_transformers import SentenceTransformer
	except Exception:
	SentenceTransformer = None

	def ENV(k, d=None, cast=str):
	v = os.getenv(k, None)
	if v is None: return d
	if cast is bool: return str(v).lower() in ("1","true","yes","on")
	if cast is int:
	try: return int(v)
	except (ValueError, TypeError): return int(float(v))
	return v

	CFG = {
	"CURVE_DIR": ENV("HIVE_CURVE_DIR","./state/curves"),
	"EMB_ID": ENV("HIVE_EMB_ID","sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
	"EMB_LOCAL_DIR": ENV("HIVE_EMB_LOCAL_DIR",""),
	"KNN_TOPK": ENV("HIVE_KNN_TOPK","8", int),
	"KNN_LAMBDA": float(ENV("HIVE_KNN_LAMBDA","0.35")),
	"PHONICS_MAX_LINES": ENV("HIVE_PHONICS_MAX_LINES","2", int),
	"STAGE_BATCH": ENV("HIVE_STAGE_BATCH","128", int),
	"STAGE_SAVE_EVERY": ENV("HIVE_STAGE_SAVE_EVERY","512", int),
	"STAGE_MAX_DOCS_PER_DATASET": ENV("HIVE_STAGE_MAX_DOCS_PER_DATASET","5000", int),
	"CURVES_TARGET_MIN": ENV("HIVE_CURVES_TARGET_MIN","10000", int),
	"CURVES_RECHECK_SECS": ENV("HIVE_CURVES_RECHECK_SECS","1800", int),
	}

	try: os.makedirs(CFG["CURVE_DIR"], exist_ok=True)
	except Exception: pass

	class _EmbMux:
	def __init__(self):
	if SentenceTransformer is None:
	self.model=None; self.dim=384
	else:
	p=CFG["EMB_LOCAL_DIR"].strip()
	self.model = SentenceTransformer(p) if (p and os.path.isdir(p)) else SentenceTransformer(CFG["EMB_ID"])
	try:
	v=self.model.encode(["hi"], normalize_embeddings=True)
	self.dim=int(getattr(v,"shape",[1,384])[1])
	except Exception:
	self.dim=384
	def encode(self, texts: List[str]):
	if self.model is None:
	dim=getattr(self,"dim",384); out=[]
	for t in texts:
	h=abs(hash(t))%1000003; vec=[0.0]*dim; vec[h%dim]=1.0; out.append(vec)
	return out
	return self.model.encode(texts, normalize_embeddings=True)

	class _CurveStore:
	def __init__(self, root:str):
	self.root=root
	try: os.makedirs(self.root, exist_ok=True)
	except Exception: pass
	self.emb=_EmbMux()
	self.dim=getattr(self.emb,"dim",384)
	self.idx_path=os.path.join(self.root,"faiss.index")
	self.meta_path=os.path.join(self.root,"meta.jsonl")
	self.idx=self._load()
	def _load(self):
	if not _FAISS or faiss is None: return None
	if os.path.exists(self.idx_path):
	try: return faiss.read_index(self.idx_path)
	except Exception: pass
	return faiss.IndexFlatIP(self.dim)
	def save_index(self):
	if _FAISS and self.idx is not None:
	try: faiss.write_index(self.idx, self.idx_path)
	except Exception: pass
	def add_vectors(self, vecs, metas: List[Dict]):
	if not _FAISS or self.idx is None or np is None: return 0
	try:
	v=np.asarray(vecs, dtype="float32")
	self.idx.add(v)
	with open(self.meta_path,"a",encoding="utf-8") as f:
	for m in metas: f.write(json.dumps(m, ensure_ascii=False)+"\n")
	return len(metas)
	except Exception:
	return 0
	def add_texts(self, texts: List[str], tag="stage", scope="general"):
	if not texts: return 0
	vecs=self.emb.encode(texts)
	metas=[{"scope":scope, "tag":tag, "text":t[:500]} for t in texts]
	n=self.add_vectors(vecs, metas); self.save_index(); return n
	@property
	def count(self)->int:
	try:
	return int(self.idx.ntotal) if (self.idx is not None) else 0
	except Exception:
	return 0

	def _extract_text(rec: dict):
	for k in ("text","sentence","sentences","content","input","inputs","prompt","source","article","document","review","body"):
	if k in rec and isinstance(rec[k], str) and rec[k].strip():
	return rec[k]
	if k in rec and isinstance(rec[k], list) and rec[k] and isinstance(rec[k][0], str):
	return " ".join(rec[k])
	parts=[]
	for k,v in rec.items():
	if isinstance(v,str) and 5<=len(v)<=2000: parts.append(v)
	return " ".join(parts) if parts else ""

	def _iter_texts(name: str, max_docs:int):
	# un-stub datasets
	if "datasets" in sys.modules and getattr(sys.modules["datasets"].load_dataset, "__name__", "") == "_disabled_load_dataset":
	del sys.modules["datasets"]
	import datasets as _ds
	for split in ("train","validation","test"):
	# prefer streaming
	try:
	ds = _ds.load_dataset(name, split=split, streaming=True)
	cnt=0
	for rec in ds:
	try:
	txt = _extract_text(rec)
	if txt:
	yield txt
	cnt += 1
	if cnt >= max_docs: return
	except Exception: continue
	except Exception:
	try:
	ds = _ds.load_dataset(name, split=split)
	cnt=0
	for rec in ds:
	try:
	txt = _extract_text(rec)
	if txt:
	yield txt
	cnt += 1
	if cnt >= max_docs: return
	except Exception: continue
	except Exception:
	continue

	def build_condensed_curves(datasets_csv=None, curve_dir=None, log_cb=None):
	def log(m):
	if log_cb: log_cb(m)
	enforce_cache_budget(log)
	curve_dir = curve_dir or CFG["CURVE_DIR"]
	names = [x.strip() for x in (datasets_csv or os.getenv("HIVE_DATASETS_LIST","wi_locness")).split(",") if x.strip()]
	max_docs = int(CFG["STAGE_MAX_DOCS_PER_DATASET"])
	batch = int(CFG["STAGE_BATCH"])
	save_every = int(CFG["STAGE_SAVE_EVERY"])
	store = _CurveStore(curve_dir)
	total_added=0
	for name in names:
	log(f"[staged] building condensed curves from '{name}' (max {max_docs} docs)…")
	buf=[]; added=0; last_save=0
	for txt in _iter_texts(name, max_docs):
	buf.append(txt)
	if len(buf) >= batch:
	n = store.add_texts(buf, tag=f"ds:{name}", scope="general")
	added += n; total_added += n; buf = []
	if added - last_save >= save_every:
	store.save_index(); enforce_cache_budget(log); last_save = added
	log(f"[staged] progress '{name}': {added} items … (index={store.count})")
	if buf:
	n = store.add_texts(buf, tag=f"ds:{name}", scope="general")
	added += n; total_added += n; buf = []
	store.save_index(); enforce_cache_budget(log)
	log(f"[staged] '{name}' done: {added} items condensed (index={store.count}).")
	log(f"[staged] total added: {total_added} (index={store.count})")
	return True

	# Background supervisor: runs at start and on interval; only builds if below target
	def _should_build(curve_dir=None)->bool:
	store=_CurveStore(curve_dir or CFG["CURVE_DIR"])
	return store.count < int(CFG["CURVES_TARGET_MIN"])

	def _background_supervisor(log_cb=None):
	def log(m):
	if log_cb: log_cb(m)
	interval = int(CFG["CURVES_RECHECK_SECS"])
	while True:
	try:
	if _should_build():
	log("[staged] target not met; starting condensed-curves build…")
	build_condensed_curves(log_cb=log)
	else:
	log("[staged] target met; no build needed.")
	except Exception as e:
	log(f"[staged] supervisor error: {e}")
	time.sleep(interval)

	def _kickoff_background_if_enabled(log_cb=None):
	if os.getenv("HIVE_BUILD_CONDENSED_CURVES_ON_START","0").lower() not in ("1","true","yes","on"):
	return
	threading.Thread(target=_background_supervisor, args=(log_cb,), daemon=True).start()

	# ---------- Tutor+ light additions (no UI) ----------
	def _ipa_or_hyphenate(text:str)->str:
	try:
	import eng_to_ipa as ipa
	ipa_text=ipa.convert(text)
	if ipa_text and ipa_text!=text:
	return f"{text} /{ipa_text}/"
	except Exception:
	pass
	try:
	import pyphen
	dic=pyphen.Pyphen(lang="en"); return dic.inserted(text)
	except Exception:
	return text

	def _gentle_phonics_block(text:str, max_lines:int)->str:
	import re as _re
	words=_re.findall(r"[A-Za-z][A-Za-z\-']{2,}", text or "")
	words=sorted(set(words), key=lambda w:(-len(w), w.lower()))
	picks=words[:max_lines]
	if not picks: return ""
	return "\n".join([f"- {_ipa_or_hyphenate(w)}" for w in picks])

	def _route_intent(txt:str)->str:
	import re as _re
	if _re.search(r"\b(spell\|spelling\|how\s+do\s+you\s+spell)\b", txt or "", _re.I): return "direct_spell"
	if _re.search(r"\b(pronounc(e\|iation)\|ipa\|phonics\|how\s+do\s+you\s+say)\b", txt or "", _re.I): return "pronounce"
	if _re.search(r"\b(essay\|review\|evaluate\|feedback\|improv(e\|ements?)\|revise\|critique\|proofread\s+my\s+essay)\b", txt or "", _re.I): return "essay_review"
	if _re.search(r"\b(grammar\|correct\|fix\|proofread\|mistakes?)\b", txt or "", _re.I): return "direct_grammar"
	return "tutor"

	BaseHive = getattr(hive_base, "Hive", object)
	class Hive(BaseHive):
	def __init__(self, a, *k):
	super().__init__(a, *k)
	def chat(self, message:str, a, *k)->str:
	mode=_route_intent(message or "")
	try:
	reply=super().chat(message, a, *k)
	except Exception as e:
	reply=f"[Base chat failed: {e}]"
	if isinstance(reply,str) and (mode in ("pronounce","direct_spell") or (mode=="tutor" and len(reply.split())<=40)):
	hints=_gentle_phonics_block(reply, int(CFG["PHONICS_MAX_LINES"]))
	if hints: reply += "\n\nPhonics hints (brief)\n" + hints
	return reply

	# ---------- Entrypoint ----------
	def build_ui():
	# Respect user's base UI if present; we do NOT add any tabs here.
	try:
	import gradio as gr
	except Exception:
	return None
	for name in ("build_ui","launch_ui","get_ui","make_ui"):
	if hasattr(hive_base, name):
	try:
	ui = getattr(hive_base, name)()
	return ui if isinstance(ui, gr.Blocks) else None
	except Exception:
	pass
	return None

	# --- BOTTOM OF FILE: replace your old `if __name__ == "__main__":` with this ---
	# --- BOTTOM OF FILE: replace any old REPL/main block with this ---
	import os
	import sys
	import time
	import argparse

	def _read_line(prompt="> "):
	# Avoid prompting when there’s no interactive terminal (e.g., Spaces)
	if not sys.stdin or not sys.stdin.isatty():
	prompt = ""
	try:
	return input(prompt)
	except EOFError:
	return None

	def handle_user_input(s: str) -> str:
	# minimal glue to your backend
	global _HIVE_SINGLETON
	if _HIVE_SINGLETON is None:
	_HIVE_SINGLETON = Hive()
	return _HIVE_SINGLETON.chat(s)

	def run_cli_loop():
	while True:
	s = _read_line("> ")
	if s is None: # No stdin / non-interactive environment
	break # fall through to headless wait
	s = s.strip()
	if not s:
	continue
	reply = handle_user_input(s)
	print(reply, flush=True)

	def run_headless_wait():
	print("APP_READY: initialized (headless). Waiting for requests...", flush=True)
	while True:
	time.sleep(3600)

	_HIVE_SINGLETON = None # global lazy instance for CLI & UI

	def build_ui():
	try:
	import gradio as gr
	except Exception as e:
	print(f"[ui] Gradio import failed: {e}")
	return None

	with gr.Blocks() as demo:
	state = gr.State(None) # we'll lazily create Hive on first use
	chatbox = gr.Chatbot(height=400)
	msg = gr.Textbox(placeholder="Type your message…", label="Message")
	send = gr.Button("Send")

	def on_send(user_msg, st, history):
	global _HIVE_SINGLETON
	if _HIVE_SINGLETON is None:
	_HIVE_SINGLETON = Hive() # <- construct here, not at import time
	if not user_msg.strip():
	return history, ""
	reply = _HIVE_SINGLETON.chat(user_msg)
	history = (history or []) + [[user_msg, reply]]
	return history, ""

	send.click(on_send, inputs=[msg, state, chatbox], outputs=[chatbox, msg])

	return demo

	if __name__ == "__main__":
	import os, sys, argparse

	parser = argparse.ArgumentParser()
	parser.add_argument("--ui", action="store_true", help="Force-launch Gradio UI")
	args = parser.parse_args()

	# Detect headless container (no interactive stdin/TTY)
	HEADLESS = (not sys.stdin) or (not sys.stdin.isatty())

	# Decide whether to bring up the web UI:
	# - if user passed --ui
	# - OR if we're headless (no stdin)
	# - OR if FORCE_UI env var is set (FORCE_UI=1/true/yes)
	force_env = os.getenv("FORCE_UI", "").lower() in ("1", "true", "yes")
	WANTS_UI = args.ui or HEADLESS or force_env

	if WANTS_UI:
	ui = build_ui()
	if ui is None:
	print("Gradio not installed; falling back to CLI.")
	run_cli_loop()
	if HEADLESS:
	run_headless_wait()
	else:
	# Start any background tasks after app is live (ignore if not defined)
	try:
	_kickoff_background_if_enabled(log_cb=print)
	except NameError:
	pass

	# Respect platform port if provided
	port = int(os.getenv("PORT", "7860"))
	os.environ.setdefault("RUNNING_IN_UI","1"); ui.queue().launch(server_name="0.0.0.0", server_port=port)
	else:
	# CLI mode when a TTY exists
	print("Hive (Original-first, Tutor+ • SAFE v5 BACKGROUND) ready. Type to chat.")
	try:
	_kickoff_background_if_enabled(log_cb=print)
	except NameError:
	pass
	run_cli_loop()

	# If the CLI loop ended because stdin vanished, keep app alive
	if HEADLESS:
	run_headless_wait()