WCNegentropy
/

BitTransformerLM

@@ -1,82 +0,0 @@
-import torch
-from typing import List
-def compress_bits(bits: torch.Tensor) -> torch.Tensor:
-    """Run-length encode a 1D tensor of bits.
-    Args:
-        bits: 1D tensor with values 0 or 1 (bool or uint8).
-    Returns:
-        1D uint8 tensor containing interleaved values and run lengths.
-    """
-    if bits.dim() != 1:
-        raise ValueError("compress_bits expects a 1D tensor")
-    b = bits.to(torch.uint8).flatten()
-    if b.numel() == 0:
-        return b
-    changes = torch.nonzero(b[1:] != b[:-1]).flatten().to(torch.long) + 1
-    starts = torch.cat([b.new_tensor([0], dtype=torch.long), changes])
-    ends = torch.cat([changes, b.new_tensor([b.numel()], dtype=torch.long)])
-    values = b[starts.to(torch.long)]
-    counts = ends - starts
-    out_vals: List[int] = []
-    out_counts: List[int] = []
-    for v, c in zip(values.tolist(), counts.tolist()):
-        while c > 255:
-            out_vals.append(v)
-            out_counts.append(255)
-            c -= 255
-        out_vals.append(v)
-        out_counts.append(c)
-    values_tensor = torch.tensor(out_vals, dtype=torch.uint8)
-    counts_tensor = torch.tensor(out_counts, dtype=torch.uint8)
-    out = torch.stack([values_tensor, counts_tensor], dim=1).flatten()
-    return out
-def decompress_bits(compressed: torch.Tensor) -> torch.Tensor:
-    """Decode a run-length encoded bit tensor."""
-    if compressed.dim() != 1 or compressed.numel() % 2 != 0:
-        raise ValueError("compressed tensor must be 1D even-length")
-    data = compressed.to(torch.uint8)
-    values = data[0::2]
-    counts = data[1::2].to(torch.long)
-    return torch.repeat_interleave(values, counts)
-def model_output_decompress(compressed_batch) -> torch.Tensor:
-    """Decompress a batch of compressed bit sequences."""
-    if isinstance(compressed_batch, torch.Tensor) and compressed_batch.dim() == 1:
-        sequences = [decompress_bits(compressed_batch)]
-    else:
-        sequences = [decompress_bits(row) for row in compressed_batch]
-    lengths = [seq.numel() for seq in sequences]
-    if len(set(lengths)) != 1:
-        raise ValueError("Sequences decompress to different lengths")
-    return torch.stack(sequences)
-import numpy as np
-def pack_bits(bits: torch.Tensor) -> torch.Tensor:
-    """Pack groups of 8 bits into uint8 values using numpy.packbits."""
-    if bits.dim() != 1:
-        raise ValueError("pack_bits expects a 1D tensor")
-    arr = bits.to(torch.uint8).cpu().numpy()
-    packed = np.packbits(arr)
-    return torch.from_numpy(packed)
-def unpack_bits(packed: torch.Tensor, *, n_bits: int | None = None) -> torch.Tensor:
-    """Unpack uint8 values back into a bit tensor."""
-    if packed.dim() != 1:
-        raise ValueError("unpack_bits expects a 1D tensor")
-    arr = np.unpackbits(packed.to(torch.uint8).cpu().numpy())
-    if n_bits is not None:
-        arr = arr[:n_bits]
-    return torch.from_numpy(arr)