SSLAM_pretrain / configuration_eat.py
ta012's picture
Upload 7 files
501ebee verified
# configuration_eat.py
from transformers import PretrainedConfig
class EATConfig(PretrainedConfig):
model_type = "eat"
def __init__(
self,
embed_dim=768,
depth=12,
num_heads=12,
patch_size=16,
stride=16,
in_chans=1,
mel_bins=128,
max_length=768,
num_classes=527,
model_variant="pretrain", # or "finetune"
mlp_ratio=4.0,
qkv_bias=True,
drop_rate=0.0,
attn_drop_rate=0.0,
activation_dropout=0.0,
post_mlp_drop=0.0,
start_drop_path_rate=0.0,
end_drop_path_rate=0.0,
layer_norm_first=False,
norm_eps=1e-6,
norm_affine=True,
fixed_positions=True,
img_size=(1024, 128), # (target_length, mel_bins)
**kwargs,
):
super().__init__(**kwargs)
self.embed_dim = embed_dim
self.depth = depth
self.num_heads = num_heads
self.patch_size = patch_size
self.stride = stride
self.in_chans = in_chans
self.mel_bins = mel_bins
self.max_length = max_length
self.num_classes = num_classes
self.model_variant = model_variant
self.mlp_ratio = mlp_ratio
self.qkv_bias = qkv_bias
self.drop_rate = drop_rate
self.attn_drop_rate = attn_drop_rate
self.activation_dropout = activation_dropout
self.post_mlp_drop = post_mlp_drop
self.start_drop_path_rate = start_drop_path_rate
self.end_drop_path_rate = end_drop_path_rate
self.layer_norm_first = layer_norm_first
self.norm_eps = norm_eps
self.norm_affine = norm_affine
self.fixed_positions = fixed_positions
self.img_size = img_size