| import copy | |
| from typing import Optional | |
| import torch.nn as nn | |
| ACTIVATION_FUNCTIONS = { | |
| "swish": nn.SiLU(), | |
| "silu": nn.SiLU(), | |
| "mish": nn.Mish(), | |
| "gelu": nn.GELU(), | |
| "relu": nn.ReLU() | |
| } | |
| def get_clone(module: nn.Module) -> nn.Module: | |
| return copy.deepcopy(module) | |
| def get_clones(module: nn.Module, N: int) -> nn.ModuleList: | |
| return nn.ModuleList([copy.deepcopy(module) for _ in range(N)]) | |
| def get_activation_fn(act_fn: Optional[str] = None) -> nn.Module: | |
| if act_fn is None: | |
| return nn.Identity() | |
| act_fn = act_fn.lower() | |
| if act_fn in ACTIVATION_FUNCTIONS: | |
| return ACTIVATION_FUNCTIONS[act_fn] | |
| else: | |
| raise ValueError(f"Unsupported activation function: {act_fn}") | |
| def zero_module(module: nn.Module) -> nn.Module: | |
| for p in module.parameters(): | |
| nn.init.zeros_(p) | |
| return module | |