Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /tensor_layer_planner.py
| """Tensor-layer planning for very deep virtual TinyMind stacks.""" | |
| from __future__ import annotations | |
| from datetime import datetime, timezone | |
| import json | |
| import math | |
| from pathlib import Path | |
| def choose_stable_tensor_layer_plan( | |
| *, | |
| target_layers_tensor: int = 1803, | |
| min_physical_layers: int = 32, | |
| max_physical_layers: int = 48, | |
| hidden_dim: int = 5120, | |
| ) -> dict: | |
| if target_layers_tensor <= 0: | |
| raise ValueError("target_layers_tensor must be positive") | |
| if min_physical_layers <= 0 or max_physical_layers < min_physical_layers: | |
| raise ValueError("invalid physical layer search range") | |
| candidates = [] | |
| preferred_physical = 41 | |
| for physical in range(min_physical_layers, max_physical_layers + 1): | |
| depth = math.ceil(target_layers_tensor / physical) | |
| allocated = physical * depth | |
| masked = allocated - target_layers_tensor | |
| active_ratio = target_layers_tensor / allocated | |
| divisibility_passed = masked == 0 | |
| physical_distance = abs(physical - preferred_physical) / max(preferred_physical, 1) | |
| depth_pressure = depth / 64.0 | |
| memory_pressure = (physical * hidden_dim) / (48 * 5120) | |
| risk_score = 0.45 * (1.0 - active_ratio) + 0.25 * physical_distance + 0.20 * max(0.0, depth_pressure - 1.0) + 0.10 * max(0.0, memory_pressure - 1.0) | |
| stability_score = max(0.0, 1.0 - risk_score) | |
| candidates.append( | |
| { | |
| "physical_layers": physical, | |
| "tensor_depth_per_physical_layer": depth, | |
| "allocated_layers_tensor": allocated, | |
| "active_layers_tensor": target_layers_tensor, | |
| "masked_layers_tensor": masked, | |
| "active_tensor_ratio": active_ratio, | |
| "divisibility_passed": divisibility_passed, | |
| "risk_score": risk_score, | |
| "stability_score": stability_score, | |
| } | |
| ) | |
| candidates.sort( | |
| key=lambda row: ( | |
| row["masked_layers_tensor"] != 0, | |
| row["risk_score"], | |
| abs(row["physical_layers"] - preferred_physical), | |
| row["physical_layers"], | |
| ) | |
| ) | |
| return candidates[0] | |
| def optimize_tensors_per_layer( | |
| *, | |
| target_layers_tensor: int = 1803, | |
| min_tensors_per_layer: int = 24, | |
| max_tensors_per_layer: int = 64, | |
| hidden_dim: int = 5120, | |
| max_physical_layers: int = 48, | |
| ) -> dict: | |
| if min_tensors_per_layer <= 0 or max_tensors_per_layer < min_tensors_per_layer: | |
| raise ValueError("invalid tensors_per_layer range") | |
| candidates = [] | |
| preferred_physical = 41 | |
| preferred_depth = 44 | |
| for tensors_per_layer in range(min_tensors_per_layer, max_tensors_per_layer + 1): | |
| physical_layers = math.ceil(target_layers_tensor / tensors_per_layer) | |
| allocated = physical_layers * tensors_per_layer | |
| masked = allocated - target_layers_tensor | |
| active_ratio = target_layers_tensor / allocated | |
| physical_pressure = physical_layers / max_physical_layers | |
| depth_distance = abs(tensors_per_layer - preferred_depth) / preferred_depth | |
| physical_distance = abs(physical_layers - preferred_physical) / preferred_physical | |
| mask_penalty = masked / target_layers_tensor | |
| over_limit_penalty = max(0.0, physical_layers - max_physical_layers) / max_physical_layers | |
| memory_pressure = (physical_layers * hidden_dim) / (max_physical_layers * 5120) | |
| risk_score = ( | |
| 0.50 * mask_penalty | |
| + 0.18 * depth_distance | |
| + 0.16 * physical_distance | |
| + 0.10 * max(0.0, memory_pressure - 1.0) | |
| + 0.06 * over_limit_penalty | |
| ) | |
| stability_score = max(0.0, 1.0 - risk_score) | |
| sharpness_score = max(0.0, active_ratio * stability_score * (1.0 - 0.05 * max(0.0, physical_pressure - 1.0))) | |
| candidates.append( | |
| { | |
| "tensors_per_layer": tensors_per_layer, | |
| "physical_layers": physical_layers, | |
| "allocated_layers_tensor": allocated, | |
| "active_layers_tensor": target_layers_tensor, | |
| "masked_layers_tensor": masked, | |
| "active_tensor_ratio": active_ratio, | |
| "physical_pressure": physical_pressure, | |
| "risk_score": risk_score, | |
| "stability_score": stability_score, | |
| "sharpness_score": sharpness_score, | |
| "rtx_3090_planning_safe": physical_layers <= max_physical_layers and hidden_dim <= 5120, | |
| } | |
| ) | |
| candidates.sort( | |
| key=lambda row: ( | |
| not row["rtx_3090_planning_safe"], | |
| -row["sharpness_score"], | |
| row["masked_layers_tensor"], | |
| abs(row["tensors_per_layer"] - preferred_depth), | |
| row["tensors_per_layer"], | |
| ) | |
| ) | |
| return { | |
| "target_layers_tensor": target_layers_tensor, | |
| "search_range": { | |
| "min_tensors_per_layer": min_tensors_per_layer, | |
| "max_tensors_per_layer": max_tensors_per_layer, | |
| "max_physical_layers": max_physical_layers, | |
| }, | |
| "best": candidates[0], | |
| "top_candidates": candidates[:10], | |
| } | |
| def build_tensor_layer_plan( | |
| out_dir: str | Path, | |
| *, | |
| target_layers_tensor: int = 1803, | |
| physical_layers: int = 41, | |
| hidden_dim: int = 5120, | |
| local_window: int = 2048, | |
| ) -> dict: | |
| if target_layers_tensor <= 0: | |
| raise ValueError("target_layers_tensor must be positive") | |
| planner_mode = "manual" | |
| if physical_layers < 0: | |
| raise ValueError("physical_layers must be non-negative") | |
| if physical_layers == 0: | |
| planner_mode = "auto_stable" | |
| selected = choose_stable_tensor_layer_plan( | |
| target_layers_tensor=target_layers_tensor, | |
| min_physical_layers=32, | |
| max_physical_layers=48, | |
| hidden_dim=hidden_dim, | |
| ) | |
| physical_layers = int(selected["physical_layers"]) | |
| tensor_depth_per_physical = math.ceil(target_layers_tensor / physical_layers) | |
| allocated_layers_tensor = physical_layers * tensor_depth_per_physical | |
| masked_layers_tensor = allocated_layers_tensor - target_layers_tensor | |
| active_ratio = target_layers_tensor / allocated_layers_tensor | |
| stability = choose_stable_tensor_layer_plan( | |
| target_layers_tensor=target_layers_tensor, | |
| min_physical_layers=physical_layers, | |
| max_physical_layers=physical_layers, | |
| hidden_dim=hidden_dim, | |
| ) | |
| tensors_per_layer_search = optimize_tensors_per_layer( | |
| target_layers_tensor=target_layers_tensor, | |
| min_tensors_per_layer=24, | |
| max_tensors_per_layer=64, | |
| hidden_dim=hidden_dim, | |
| ) | |
| report = { | |
| "schema_version": "tinymind-tensor-layer-plan-v1", | |
| "created_at": datetime.now(timezone.utc).isoformat(), | |
| "target_layers_tensor": int(target_layers_tensor), | |
| "planner_mode": planner_mode, | |
| "active_layers_tensor": int(target_layers_tensor), | |
| "allocated_layers_tensor": int(allocated_layers_tensor), | |
| "masked_layers_tensor": int(masked_layers_tensor), | |
| "physical_layers": int(physical_layers), | |
| "tensor_depth_per_physical_layer": int(tensor_depth_per_physical), | |
| "tensors_per_layer": int(tensor_depth_per_physical), | |
| "active_tensor_ratio": active_ratio, | |
| "risk_score": float(stability["risk_score"]), | |
| "stability_score": float(stability["stability_score"]), | |
| "hidden_dim": int(hidden_dim), | |
| "local_window": int(local_window), | |
| "execution_model": { | |
| "kind": "virtual_tensor_depth_with_masked_micro_layers", | |
| "description": ( | |
| "Use a normal physical transformer/PureField stack and attach tensorized " | |
| "micro-depth lanes inside each block. Extra allocated lanes are masked so " | |
| "the active Total Layers Tensor is exact." | |
| ), | |
| }, | |
| "tensors_per_layer_optimizer": tensors_per_layer_search, | |
| "feasibility_gate": { | |
| "rtx_3090_planning_safe": physical_layers <= 48 and hidden_dim <= 5120, | |
| "reason": "The plan keeps physical layer count bounded; 1803 is represented as virtual tensor depth.", | |
| }, | |
| "stability_gate": { | |
| "passed": bool(stability["stability_score"] >= 0.95 and masked_layers_tensor <= max(1, target_layers_tensor // 256)), | |
| "divisibility_passed": bool(masked_layers_tensor == 0), | |
| "stability_score": float(stability["stability_score"]), | |
| "risk_score": float(stability["risk_score"]), | |
| "masked_layers_tensor": int(masked_layers_tensor), | |
| "reason": "Stable plans minimize masked virtual lanes and keep physical/depth pressure bounded.", | |
| }, | |
| "claim_gate": { | |
| "total_layers_tensor_claim_allowed": True, | |
| "physical_1803_layers_claim_allowed": False, | |
| "world_best_depth_claim_allowed": False, | |
| "reason": "This is an architecture/planning metric, not measured quality or physical layer depth.", | |
| }, | |
| } | |
| out_path = Path(out_dir) | |
| out_path.mkdir(parents=True, exist_ok=True) | |
| json_path = out_path / "tensor_layer_plan.json" | |
| md_path = out_path / "tensor_layer_plan.md" | |
| report["json_path"] = str(json_path) | |
| report["markdown_path"] = str(md_path) | |
| json_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8") | |
| md_path.write_text(_markdown(report), encoding="utf-8") | |
| return report | |
| def _markdown(report: dict) -> str: | |
| lines = [ | |
| "# TinyMind Tensor Layer Plan", | |
| "", | |
| f"- Total Layers Tensor target: {report['target_layers_tensor']}", | |
| f"- Active Layers Tensor: {report['active_layers_tensor']}", | |
| f"- Physical layers: {report['physical_layers']}", | |
| f"- Tensor depth per physical layer: {report['tensor_depth_per_physical_layer']}", | |
| f"- Best tensors per layer: {report['tensors_per_layer_optimizer']['best']['tensors_per_layer']}", | |
| f"- Tensor sharpness score: {report['tensors_per_layer_optimizer']['best']['sharpness_score']:.4f}", | |
| f"- Masked tensor layers: {report['masked_layers_tensor']}", | |
| f"- Stability score: {report['stability_score']:.4f}", | |
| f"- RTX 3090 planning safe: {report['feasibility_gate']['rtx_3090_planning_safe']}", | |
| f"- Stability gate: {report['stability_gate']['passed']}", | |
| "- Physical 1803-layer claim: blocked", | |
| ] | |
| return "\n".join(lines) + "\n" | |
Xet Storage Details
- Size:
- 10.6 kB
- Xet hash:
- 8d099ac0c4fd05bd9c6755798c3adb2b62d83e8c3212355e04f4ec7e584ed84f
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.