File size: 1,978 Bytes

0c1d6f8

# coding=utf-8
# Copyright 2026 NAVER Cloud Corp. and the HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""HyperCLOVAX-SEED Audio Encoder configuration."""

from transformers import AutoConfig, PretrainedConfig


class HyperCLOVAXSeedAudioEncoderConfig(PretrainedConfig):
    """Configuration for HyperCLOVAXSeedAudioEncoder wrapped as PreTrainedModel."""

    model_type = "hyperclovax_seed_audio_encoder"

    def __init__(
        self,
        d_model=768,
        encoder_layers=12,
        encoder_attention_heads=12,
        encoder_ffn_dim=3072,
        num_mel_bins=128,
        max_source_positions=1500,
        dropout=0.1,
        attention_dropout=0.1,
        pool_kernel_size=5,
        pool_stride=5,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.d_model = d_model
        self.hidden_size = d_model  # alias for compatibility
        self.encoder_layers = encoder_layers
        self.encoder_attention_heads = encoder_attention_heads
        self.encoder_ffn_dim = encoder_ffn_dim
        self.num_mel_bins = num_mel_bins
        self.max_source_positions = max_source_positions
        self.dropout = dropout
        self.attention_dropout = attention_dropout
        self.pool_kernel_size = pool_kernel_size
        self.pool_stride = pool_stride


AutoConfig.register("hyperclovax_seed_audio_encoder", HyperCLOVAXSeedAudioEncoderConfig)

__all__ = ["HyperCLOVAXSeedAudioEncoderConfig"]