WCNegentropy
/

BitTransformerLM

@@ -1,95 +0,0 @@
-import json
-import os
-from typing import Dict, List, Optional, Tuple
-import torch
-from .model import BitTransformerLM
-from .training import train_loop
-def collapse_submodel(
-    cluster_data: List[List[int]],
-    target_params: Dict,
-    floors: Optional[Dict[str, float]] = None,
-    max_rounds: int = 3,
-    width_scale: float = 1.5,
-    forward_kwargs: Optional[Dict] = None,
-) -> Tuple[BitTransformerLM, Dict[str, float]]:
-    """Distill a submodel from clustered bit sequences.
-    The routine deepens the target model when telemetry floors are unmet and,
-    after the first deepening fails, widens the hidden dimensions by
-    ``width_scale`` once before retrying. Returns the distilled model and its
-    final telemetry metrics.
-    """
-    if floors is None:
-        floors = {"negentropy": 0.5, "lz_complexity": 0.3, "symbiosis_score": 0.5}
-    bit_tensor = torch.tensor(cluster_data, dtype=torch.long)
-    n = len(bit_tensor)
-    split = max(1, int(0.8 * n))
-    train_bits = bit_tensor[:split]
-    val_bits = bit_tensor[split:]
-    if len(val_bits) == 0:
-        val_bits = train_bits
-    params = target_params.copy()
-    metrics: Dict[str, float] = {}
-    width_scaled = False
-    for round_idx in range(max_rounds):
-        model = BitTransformerLM(**params)
-        train_loop(
-            model,
-            train_bits,
-            epochs=2,
-            compress_prob=0.5,
-            direct_prob=0.0,
-            log=False,
-            forward_kwargs=forward_kwargs,
-        )
-        with torch.no_grad():
-            logits, telemetry = model(val_bits, **(forward_kwargs or {}))
-            neg_k = model.negentropy_logits(logits).mean().item()
-            lz_c = model.lz_complexity_logits(logits).mean().item()
-            sym_s = telemetry["symbiosis_score"].mean().item()
-        metrics = {
-            "negentropy": neg_k,
-            "lz_complexity": lz_c,
-            "symbiosis_score": sym_s,
-        }
-        if (
-            neg_k >= floors["negentropy"]
-            and lz_c >= floors["lz_complexity"]
-            and sym_s >= floors["symbiosis_score"]
-        ):
-            break
-        if round_idx == 0:
-            params["num_layers"] = max(1, params.get("num_layers", 1)) + 1
-        elif not width_scaled:
-            params["d_model"] = int(params.get("d_model", 32) * width_scale)
-            params["dim_feedforward"] = int(
-                params.get("dim_feedforward", 64) * width_scale
-            )
-            width_scaled = True
-        else:
-            params["num_layers"] = max(1, params.get("num_layers", 1)) + 1
-    return model, metrics
-def save_distilled_model(
-    model: BitTransformerLM,
-    path: str,
-    metrics: Dict[str, float],
-    floors: Optional[Dict[str, float]] = None,
-) -> None:
-    """Serialize a distilled model and its metric summary to disk.
-    Weights are written to ``path`` and a ``metrics.json`` file is placed in the
-    same directory containing the achieved metrics alongside the target floors.
-    """
-    torch.save(model.state_dict(), path)
-    payload = {"metrics": metrics, "floors": floors or {}}
-    metrics_path = os.path.join(os.path.dirname(path), "metrics.json")
-    with open(metrics_path, "w") as f:
-        json.dump(payload, f)