Buckets:

bbkdevops
/

unicosys-hypergraph-bucket

Files

xet

bbkdevops/unicosys-hypergraph-bucket / tinymind-native-8b-remote-handoff /bundle /evaluation /tensor_layer_planner.py

bbkdevops

about 1 month ago

download

raw

10.6 kB

	"""Tensor-layer planning for very deep virtual TinyMind stacks."""

	from __future__ import annotations

	from datetime import datetime, timezone
	import json
	import math
	from pathlib import Path


	def choose_stable_tensor_layer_plan(
	*,
	target_layers_tensor: int = 1803,
	min_physical_layers: int = 32,
	max_physical_layers: int = 48,
	hidden_dim: int = 5120,
	) -> dict:
	if target_layers_tensor <= 0:
	raise ValueError("target_layers_tensor must be positive")
	if min_physical_layers <= 0 or max_physical_layers < min_physical_layers:
	raise ValueError("invalid physical layer search range")
	candidates = []
	preferred_physical = 41
	for physical in range(min_physical_layers, max_physical_layers + 1):
	depth = math.ceil(target_layers_tensor / physical)
	allocated = physical * depth
	masked = allocated - target_layers_tensor
	active_ratio = target_layers_tensor / allocated
	divisibility_passed = masked == 0
	physical_distance = abs(physical - preferred_physical) / max(preferred_physical, 1)
	depth_pressure = depth / 64.0
	memory_pressure = (physical * hidden_dim) / (48 * 5120)
	risk_score = 0.45 * (1.0 - active_ratio) + 0.25 * physical_distance + 0.20 * max(0.0, depth_pressure - 1.0) + 0.10 * max(0.0, memory_pressure - 1.0)
	stability_score = max(0.0, 1.0 - risk_score)
	candidates.append(
	{
	"physical_layers": physical,
	"tensor_depth_per_physical_layer": depth,
	"allocated_layers_tensor": allocated,
	"active_layers_tensor": target_layers_tensor,
	"masked_layers_tensor": masked,
	"active_tensor_ratio": active_ratio,
	"divisibility_passed": divisibility_passed,
	"risk_score": risk_score,
	"stability_score": stability_score,
	}
	)
	candidates.sort(
	key=lambda row: (
	row["masked_layers_tensor"] != 0,
	row["risk_score"],
	abs(row["physical_layers"] - preferred_physical),
	row["physical_layers"],
	)
	)
	return candidates[0]


	def optimize_tensors_per_layer(
	*,
	target_layers_tensor: int = 1803,
	min_tensors_per_layer: int = 24,
	max_tensors_per_layer: int = 64,
	hidden_dim: int = 5120,
	max_physical_layers: int = 48,
	) -> dict:
	if min_tensors_per_layer <= 0 or max_tensors_per_layer < min_tensors_per_layer:
	raise ValueError("invalid tensors_per_layer range")
	candidates = []
	preferred_physical = 41
	preferred_depth = 44
	for tensors_per_layer in range(min_tensors_per_layer, max_tensors_per_layer + 1):
	physical_layers = math.ceil(target_layers_tensor / tensors_per_layer)
	allocated = physical_layers * tensors_per_layer
	masked = allocated - target_layers_tensor
	active_ratio = target_layers_tensor / allocated
	physical_pressure = physical_layers / max_physical_layers
	depth_distance = abs(tensors_per_layer - preferred_depth) / preferred_depth
	physical_distance = abs(physical_layers - preferred_physical) / preferred_physical
	mask_penalty = masked / target_layers_tensor
	over_limit_penalty = max(0.0, physical_layers - max_physical_layers) / max_physical_layers
	memory_pressure = (physical_layers * hidden_dim) / (max_physical_layers * 5120)
	risk_score = (
	0.50 * mask_penalty
	+ 0.18 * depth_distance
	+ 0.16 * physical_distance
	+ 0.10 * max(0.0, memory_pressure - 1.0)
	+ 0.06 * over_limit_penalty
	)
	stability_score = max(0.0, 1.0 - risk_score)
	sharpness_score = max(0.0, active_ratio * stability_score * (1.0 - 0.05 * max(0.0, physical_pressure - 1.0)))
	candidates.append(
	{
	"tensors_per_layer": tensors_per_layer,
	"physical_layers": physical_layers,
	"allocated_layers_tensor": allocated,
	"active_layers_tensor": target_layers_tensor,
	"masked_layers_tensor": masked,
	"active_tensor_ratio": active_ratio,
	"physical_pressure": physical_pressure,
	"risk_score": risk_score,
	"stability_score": stability_score,
	"sharpness_score": sharpness_score,
	"rtx_3090_planning_safe": physical_layers <= max_physical_layers and hidden_dim <= 5120,
	}
	)
	candidates.sort(
	key=lambda row: (
	not row["rtx_3090_planning_safe"],
	-row["sharpness_score"],
	row["masked_layers_tensor"],
	abs(row["tensors_per_layer"] - preferred_depth),
	row["tensors_per_layer"],
	)
	)
	return {
	"target_layers_tensor": target_layers_tensor,
	"search_range": {
	"min_tensors_per_layer": min_tensors_per_layer,
	"max_tensors_per_layer": max_tensors_per_layer,
	"max_physical_layers": max_physical_layers,
	},
	"best": candidates[0],
	"top_candidates": candidates[:10],
	}


	def build_tensor_layer_plan(
	out_dir: str \| Path,
	*,
	target_layers_tensor: int = 1803,
	physical_layers: int = 41,
	hidden_dim: int = 5120,
	local_window: int = 2048,
	) -> dict:
	if target_layers_tensor <= 0:
	raise ValueError("target_layers_tensor must be positive")
	planner_mode = "manual"
	if physical_layers < 0:
	raise ValueError("physical_layers must be non-negative")
	if physical_layers == 0:
	planner_mode = "auto_stable"
	selected = choose_stable_tensor_layer_plan(
	target_layers_tensor=target_layers_tensor,
	min_physical_layers=32,
	max_physical_layers=48,
	hidden_dim=hidden_dim,
	)
	physical_layers = int(selected["physical_layers"])
	tensor_depth_per_physical = math.ceil(target_layers_tensor / physical_layers)
	allocated_layers_tensor = physical_layers * tensor_depth_per_physical
	masked_layers_tensor = allocated_layers_tensor - target_layers_tensor
	active_ratio = target_layers_tensor / allocated_layers_tensor
	stability = choose_stable_tensor_layer_plan(
	target_layers_tensor=target_layers_tensor,
	min_physical_layers=physical_layers,
	max_physical_layers=physical_layers,
	hidden_dim=hidden_dim,
	)
	tensors_per_layer_search = optimize_tensors_per_layer(
	target_layers_tensor=target_layers_tensor,
	min_tensors_per_layer=24,
	max_tensors_per_layer=64,
	hidden_dim=hidden_dim,
	)
	report = {
	"schema_version": "tinymind-tensor-layer-plan-v1",
	"created_at": datetime.now(timezone.utc).isoformat(),
	"target_layers_tensor": int(target_layers_tensor),
	"planner_mode": planner_mode,
	"active_layers_tensor": int(target_layers_tensor),
	"allocated_layers_tensor": int(allocated_layers_tensor),
	"masked_layers_tensor": int(masked_layers_tensor),
	"physical_layers": int(physical_layers),
	"tensor_depth_per_physical_layer": int(tensor_depth_per_physical),
	"tensors_per_layer": int(tensor_depth_per_physical),
	"active_tensor_ratio": active_ratio,
	"risk_score": float(stability["risk_score"]),
	"stability_score": float(stability["stability_score"]),
	"hidden_dim": int(hidden_dim),
	"local_window": int(local_window),
	"execution_model": {
	"kind": "virtual_tensor_depth_with_masked_micro_layers",
	"description": (
	"Use a normal physical transformer/PureField stack and attach tensorized "
	"micro-depth lanes inside each block. Extra allocated lanes are masked so "
	"the active Total Layers Tensor is exact."
	),
	},
	"tensors_per_layer_optimizer": tensors_per_layer_search,
	"feasibility_gate": {
	"rtx_3090_planning_safe": physical_layers <= 48 and hidden_dim <= 5120,
	"reason": "The plan keeps physical layer count bounded; 1803 is represented as virtual tensor depth.",
	},
	"stability_gate": {
	"passed": bool(stability["stability_score"] >= 0.95 and masked_layers_tensor <= max(1, target_layers_tensor // 256)),
	"divisibility_passed": bool(masked_layers_tensor == 0),
	"stability_score": float(stability["stability_score"]),
	"risk_score": float(stability["risk_score"]),
	"masked_layers_tensor": int(masked_layers_tensor),
	"reason": "Stable plans minimize masked virtual lanes and keep physical/depth pressure bounded.",
	},
	"claim_gate": {
	"total_layers_tensor_claim_allowed": True,
	"physical_1803_layers_claim_allowed": False,
	"world_best_depth_claim_allowed": False,
	"reason": "This is an architecture/planning metric, not measured quality or physical layer depth.",
	},
	}
	out_path = Path(out_dir)
	out_path.mkdir(parents=True, exist_ok=True)
	json_path = out_path / "tensor_layer_plan.json"
	md_path = out_path / "tensor_layer_plan.md"
	report["json_path"] = str(json_path)
	report["markdown_path"] = str(md_path)
	json_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
	md_path.write_text(_markdown(report), encoding="utf-8")
	return report


	def _markdown(report: dict) -> str:
	lines = [
	"# TinyMind Tensor Layer Plan",
	"",
	f"- Total Layers Tensor target: {report['target_layers_tensor']}",
	f"- Active Layers Tensor: {report['active_layers_tensor']}",
	f"- Physical layers: {report['physical_layers']}",
	f"- Tensor depth per physical layer: {report['tensor_depth_per_physical_layer']}",
	f"- Best tensors per layer: {report['tensors_per_layer_optimizer']['best']['tensors_per_layer']}",
	f"- Tensor sharpness score: {report['tensors_per_layer_optimizer']['best']['sharpness_score']:.4f}",
	f"- Masked tensor layers: {report['masked_layers_tensor']}",
	f"- Stability score: {report['stability_score']:.4f}",
	f"- RTX 3090 planning safe: {report['feasibility_gate']['rtx_3090_planning_safe']}",
	f"- Stability gate: {report['stability_gate']['passed']}",
	"- Physical 1803-layer claim: blocked",
	]
	return "\n".join(lines) + "\n"

Xet Storage Details

Size:: 10.6 kB
Xet hash:: 8d099ac0c4fd05bd9c6755798c3adb2b62d83e8c3212355e04f4ec7e584ed84f

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.