| import copy |
| from typing import Optional |
|
|
| import torch.nn as nn |
|
|
|
|
| ACTIVATION_FUNCTIONS = { |
| "swish": nn.SiLU(), |
| "silu": nn.SiLU(), |
| "mish": nn.Mish(), |
| "gelu": nn.GELU(), |
| "relu": nn.ReLU() |
| } |
|
|
|
|
| def get_clone(module: nn.Module) -> nn.Module: |
| return copy.deepcopy(module) |
|
|
|
|
| def get_clones(module: nn.Module, N: int) -> nn.ModuleList: |
| return nn.ModuleList([copy.deepcopy(module) for _ in range(N)]) |
|
|
|
|
| def get_activation_fn(act_fn: Optional[str] = None) -> nn.Module: |
| if act_fn is None: |
| return nn.Identity() |
| act_fn = act_fn.lower() |
| if act_fn in ACTIVATION_FUNCTIONS: |
| return ACTIVATION_FUNCTIONS[act_fn] |
| else: |
| raise ValueError(f"Unsupported activation function: {act_fn}") |
|
|
|
|
| def zero_module(module: nn.Module) -> nn.Module: |
| for p in module.parameters(): |
| nn.init.zeros_(p) |
| return module |
|
|