ModuleMind

Running on Zero

App Files Files Community

ModuleMind / agents /modmind /specialist_presets.py

Quazim0t0

Add files using upload-large-folder tool

45e7dfb verified 11 days ago

Raw

History Blame Contribute Delete

2.71 kB

	"""
	specialist_presets.py -- ModularMind-on-V2 specialist sizing.

	DENSE ~80M specialists (Supra-50M-style, scaled up): a dense Llama-ish transformer with
	NO MoE / Engram / Hyper-Connections / HRM, so the parameters go into language modeling
	instead of machinery -> coherent generation (the lesson from SupraLabs/Supra-50M-Base,
	a dense model that produces coherent multi-paragraph text on FineWeb-Edu).

	Shape (shared across domains; only the vocab differs, read from registry.py):
	hidden 640, 16 layers, 10 heads / 5 KV (GQA), dense FFN 1728, ctx 1024, d_latent 256.
	-> ~81.5M params at vocab 16384 (the shared length-max tokenizer).

	The bridge bus (d_latent=256) and latent IO are kept, so train_link.py / the Gradio
	adapter still work after retraining.
	"""
	from config import SpikeWhaleConfig
	from registry import spec


	def _dense_80m(vocab_size: int) -> SpikeWhaleConfig:
	"""A dense ~80M specialist for the given vocab."""
	return SpikeWhaleConfig(
	vocab_size=vocab_size,
	hidden_size=640,
	num_hidden_layers=16,
	num_attention_heads=10,
	num_key_value_heads=5, # GQA
	head_dim=64,
	qk_rope_head_dim=16,
	q_lora_rank=320,
	o_lora_rank=160,
	tie_word_embeddings=True,
	# DENSE: no MoE. moe_intermediate_size still sizes the DenseFFN (model.py).
	use_moe=False,
	moe_intermediate_size=1728,
	# strip the heavy extras -> params go to the LM, not machinery
	use_engram=False,
	use_hyper_connections=False,
	hc_mult=1,
	use_hrm_refine=False,
	num_nextn_predict_layers=0,
	use_derf=False,
	use_xsa=True,
	# keep the ModularMind bridge bus so train_link.py / the adapter still work
	use_latent_io=True,
	d_latent=256,
	# uniform 1024 context (Supra used 1024). base_context MUST be >= training --seq-len.
	chain_position=0,
	base_context=4096,
	base_rope_theta=10000.0,
	)


	def specialist_config(domain: str = "language", position: int = 0) -> SpikeWhaleConfig:
	"""A dense ~80M specialist; vocab comes from registry.py (single source of truth)."""
	return _dense_80m(spec(domain)["vocab"])


	def generic_specialist_config(vocab_size: int, position: int = 0) -> SpikeWhaleConfig:
	"""Same dense ~80M shape for an arbitrary vocab (new domains 'just work')."""
	return _dense_80m(vocab_size)


	# Foundation chain ordering (derived from the registry, so it grows automatically)
	try:
	from registry import SPECIALISTS as _REG
	FOUNDATION_ORDER = {v["position"]: k for k, v in _REG.items()}
	except Exception:
	FOUNDATION_ORDER = {0: "language", 1: "reasoning", 2: "tool_use"}