Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /native_virtual_width.py
| from __future__ import annotations | |
| from datetime import datetime, timezone | |
| import json | |
| import math | |
| from pathlib import Path | |
| from typing import Any | |
| import torch | |
| from torch import nn | |
| class FactorizedVirtualWidthBridge(nn.Module): | |
| """Low-rank virtual-width bridge that never materializes virtual activations. | |
| The virtual dimension is represented by hashed rank coordinates and metadata. | |
| This keeps the real activation width at ``physical_dim`` while allowing the | |
| planner to budget a much larger symbolic width for routing/capacity studies. | |
| """ | |
| def __init__(self, physical_dim: int, virtual_dim: int, rank: int, lanes: int = 64): | |
| super().__init__() | |
| self.physical_dim = int(physical_dim) | |
| self.virtual_dim = int(virtual_dim) | |
| self.rank = int(rank) | |
| self.lanes = int(lanes) | |
| self.down = nn.Linear(self.physical_dim, self.rank, bias=False) | |
| self.rank_gate = nn.Parameter(torch.zeros(self.rank)) | |
| self.lane_gate = nn.Parameter(torch.zeros(self.lanes, self.rank)) | |
| self.up = nn.Linear(self.rank, self.physical_dim, bias=False) | |
| def forward(self, x: torch.Tensor) -> torch.Tensor: | |
| z = self.down(x) | |
| lane_bias = self.lane_gate.mean(dim=0) | |
| z = torch.tanh(z * torch.sigmoid(self.rank_gate + lane_bias)) | |
| return x + self.up(z) | |
| def parameter_count(self) -> int: | |
| return sum(param.numel() for param in self.parameters()) | |
| def _dense_virtual_layer_params(virtual_dim: int) -> int: | |
| # Approximate transformer/PureField dense layer budget: projections + FFN. | |
| return 8 * int(virtual_dim) * int(virtual_dim) | |
| def _factorized_params(physical_dim: int, rank: int, lanes: int) -> int: | |
| return 2 * int(physical_dim) * int(rank) + int(rank) + int(lanes) * int(rank) | |
| def _smoke_candidate(physical_dim: int, virtual_dim: int, rank: int, lanes: int) -> dict[str, Any]: | |
| torch.manual_seed(20260527) | |
| bridge = FactorizedVirtualWidthBridge(physical_dim, virtual_dim, rank, lanes) | |
| x = torch.randn(2, 8, physical_dim, requires_grad=True) | |
| y = bridge(x) | |
| loss = y.float().pow(2).mean() | |
| loss.backward() | |
| grad_values = [param.grad for param in bridge.parameters() if param.grad is not None] | |
| grad_finite = bool(grad_values) and all(torch.isfinite(grad).all().item() for grad in grad_values) | |
| return { | |
| "forward_finite": bool(torch.isfinite(y).all().item()), | |
| "backward_finite": grad_finite and bool(torch.isfinite(x.grad).all().item()), | |
| "loss": float(loss.detach().cpu()), | |
| "output_shape": list(y.shape), | |
| "bridge_params": bridge.parameter_count, | |
| } | |
| def _candidate(virtual_dim: int, physical_dim: int, layers: int, rank: int, lanes: int) -> dict[str, Any]: | |
| dense_per_layer = _dense_virtual_layer_params(virtual_dim) | |
| factor_per_layer = _factorized_params(physical_dim, rank, lanes) | |
| dense_total = dense_per_layer * int(layers) | |
| factor_total = factor_per_layer * int(layers) | |
| compression = dense_total / max(1, factor_total) | |
| smoke = _smoke_candidate(physical_dim, virtual_dim, rank, lanes) | |
| return { | |
| "virtual_dim": virtual_dim, | |
| "physical_dim": physical_dim, | |
| "layers": layers, | |
| "rank": rank, | |
| "lanes": lanes, | |
| "materializes_virtual_activations": False, | |
| "dense_virtual_params_estimate": dense_total, | |
| "factorized_bridge_params_estimate": factor_total, | |
| "compression_vs_dense_virtual": compression, | |
| "smoke": smoke, | |
| "score": math.log1p(compression) + math.log1p(physical_dim) + math.log1p(layers) + math.log1p(rank), | |
| } | |
| def build_native_virtual_width_report( | |
| out_dir: str | Path, | |
| *, | |
| virtual_dim: int = 20_480, | |
| physical_dims: list[int] | None = None, | |
| layers: list[int] | None = None, | |
| ranks: list[int] | None = None, | |
| lanes: int = 64, | |
| ) -> dict[str, Any]: | |
| physical_values = physical_dims or [512, 768, 1024] | |
| layer_values = layers or [6, 12, 24] | |
| rank_values = ranks or [64, 96, 128, 192] | |
| if virtual_dim <= 0: | |
| raise ValueError("virtual_dim must be positive") | |
| if not physical_values or not layer_values or not rank_values: | |
| raise ValueError("physical_dims, layers, and ranks must not be empty") | |
| candidates = [ | |
| _candidate(virtual_dim, physical_dim, layer_count, rank, lanes) | |
| for physical_dim in physical_values | |
| for layer_count in layer_values | |
| for rank in rank_values | |
| ] | |
| best = max(candidates, key=lambda item: item["score"]) | |
| report = { | |
| "schema": "tinymind.native_virtual_width.v1", | |
| "created_at": datetime.now(timezone.utc).isoformat(), | |
| "target": { | |
| "virtual_dim": virtual_dim, | |
| "method": "factorized_low_rank_virtual_width", | |
| "materializes_virtual_activations": False, | |
| }, | |
| "summary": { | |
| "candidate_count": len(candidates), | |
| "physical_dims": physical_values, | |
| "layers": layer_values, | |
| "ranks": rank_values, | |
| "lanes": lanes, | |
| }, | |
| "best_candidate": best, | |
| "top_candidates": sorted(candidates, key=lambda item: item["score"], reverse=True)[:8], | |
| "claim_gate": { | |
| "virtual_20480_candidate_ready": virtual_dim >= 20_480 and best["smoke"]["forward_finite"] and best["smoke"]["backward_finite"], | |
| "dense_20480_claim_allowed": False, | |
| "tier0_claim_allowed": False, | |
| "world_best_claim_allowed": False, | |
| "reason": "This proves a small factorized virtual-width bridge, not a dense 20480-wide trained frontier model.", | |
| }, | |
| } | |
| out = Path(out_dir) | |
| out.mkdir(parents=True, exist_ok=True) | |
| path = out / "native_virtual_width_report.json" | |
| report["json_path"] = str(path) | |
| path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8") | |
| return report | |
Xet Storage Details
- Size:
- 5.93 kB
- Xet hash:
- 60992a8bc61e0aea12aefd5156e30d0261f541356be84f8ec7404797b9eaebc2
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.