| import json | |
| import os | |
| from typing import Dict, List, Optional, Tuple | |
| import torch | |
| from .model import BitTransformerLM | |
| from .training import train_loop | |
| def collapse_submodel( | |
| cluster_data: List[List[int]], | |
| target_params: Dict, | |
| floors: Optional[Dict[str, float]] = None, | |
| max_rounds: int = 3, | |
| width_scale: float = 1.5, | |
| forward_kwargs: Optional[Dict] = None, | |
| ) -> Tuple[BitTransformerLM, Dict[str, float]]: | |
| """Distill a submodel from clustered bit sequences. | |
| The routine deepens the target model when telemetry floors are unmet and, | |
| after the first deepening fails, widens the hidden dimensions by | |
| ``width_scale`` once before retrying. Returns the distilled model and its | |
| final telemetry metrics. | |
| """ | |
| if floors is None: | |
| floors = {"negentropy": 0.5, "lz_complexity": 0.3, "symbiosis_score": 0.5} | |
| bit_tensor = torch.tensor(cluster_data, dtype=torch.long) | |
| n = len(bit_tensor) | |
| split = max(1, int(0.8 * n)) | |
| train_bits = bit_tensor[:split] | |
| val_bits = bit_tensor[split:] | |
| if len(val_bits) == 0: | |
| val_bits = train_bits | |
| params = target_params.copy() | |
| metrics: Dict[str, float] = {} | |
| width_scaled = False | |
| for round_idx in range(max_rounds): | |
| model = BitTransformerLM(**params) | |
| train_loop( | |
| model, | |
| train_bits, | |
| epochs=2, | |
| compress_prob=0.5, | |
| direct_prob=0.0, | |
| log=False, | |
| forward_kwargs=forward_kwargs, | |
| ) | |
| with torch.no_grad(): | |
| logits, telemetry = model(val_bits, **(forward_kwargs or {})) | |
| neg_k = model.negentropy_logits(logits).mean().item() | |
| lz_c = model.lz_complexity_logits(logits).mean().item() | |
| sym_s = telemetry["symbiosis_score"].mean().item() | |
| metrics = { | |
| "negentropy": neg_k, | |
| "lz_complexity": lz_c, | |
| "symbiosis_score": sym_s, | |
| } | |
| if ( | |
| neg_k >= floors["negentropy"] | |
| and lz_c >= floors["lz_complexity"] | |
| and sym_s >= floors["symbiosis_score"] | |
| ): | |
| break | |
| if round_idx == 0: | |
| params["num_layers"] = max(1, params.get("num_layers", 1)) + 1 | |
| elif not width_scaled: | |
| params["d_model"] = int(params.get("d_model", 32) * width_scale) | |
| params["dim_feedforward"] = int( | |
| params.get("dim_feedforward", 64) * width_scale | |
| ) | |
| width_scaled = True | |
| else: | |
| params["num_layers"] = max(1, params.get("num_layers", 1)) + 1 | |
| return model, metrics | |
| def save_distilled_model( | |
| model: BitTransformerLM, | |
| path: str, | |
| metrics: Dict[str, float], | |
| floors: Optional[Dict[str, float]] = None, | |
| ) -> None: | |
| """Serialize a distilled model and its metric summary to disk. | |
| Weights are written to ``path`` and a ``metrics.json`` file is placed in the | |
| same directory containing the achieved metrics alongside the target floors. | |
| """ | |
| torch.save(model.state_dict(), path) | |
| payload = {"metrics": metrics, "floors": floors or {}} | |
| metrics_path = os.path.join(os.path.dirname(path), "metrics.json") | |
| with open(metrics_path, "w") as f: | |
| json.dump(payload, f) | |