MOSS-Audio-4B-Thinking / configuration_moss_audio.py
kiiic's picture
Upload folder using huggingface_hub
3dfd141 verified
from transformers import PretrainedConfig, Qwen3Config
class MossAudioConfig(PretrainedConfig):
model_type = "moss_audio"
is_composition = True
def __init__(
self,
audio_config=None,
language_config=None,
adapter_hidden_size=8192,
ignore_index=-100,
deepstack_num_inject_layers=None,
**kwargs,
):
if isinstance(language_config, dict):
language_config = Qwen3Config(**language_config)
elif language_config is None:
language_config = Qwen3Config()
self.audio_config = audio_config
self.language_config = language_config
self.adapter_hidden_size = adapter_hidden_size
self.ignore_index = ignore_index
self.deepstack_num_inject_layers = deepstack_num_inject_layers
for key in ("num_hidden_layers", "eos_token_id", "bos_token_id", "vocab_size"):
kwargs.setdefault(key, getattr(language_config, key, None))
super().__init__(**kwargs)