WCNegentropy
/

BitTransformerLM

@@ -1,30 +0,0 @@
-import torch
-import torch.nn as nn
-from typing import List, Optional
-from torch.distributed.fsdp import FullyShardedDataParallel
-try:
-    from torch.distributed.pipeline.sync import Pipe
-except Exception:  # pragma: no cover - Pipe may not be available in CPU builds
-    Pipe = None
-from .model import BitTransformerLM
-def wrap_fsdp(model: BitTransformerLM, **kwargs) -> FullyShardedDataParallel:
-    """Return a ``FullyShardedDataParallel`` wrapped model on the given device."""
-    device = kwargs.pop("device_id", torch.device("cpu"))
-    model = model.to(device)
-    return FullyShardedDataParallel(model, device_id=device, **kwargs)
-def make_pipeline(model: BitTransformerLM, chunks: int = 1) -> Pipe:
-    """Wrap the model with ``Pipe`` for simple pipeline parallelism.
-    The entire model is placed in an ``nn.Sequential`` so all existing telemetry
-    remains available. ``chunks`` controls microbatch splitting.
-    """
-    if Pipe is None:
-        raise RuntimeError("Pipeline parallelism not available in this build")
-    seq = nn.Sequential(model)
-    return Pipe(seq, chunks=chunks)