| from transformers import PretrainedConfig, AutoConfig |
|
|
| class SymphonySpeechEncoderConfig(PretrainedConfig): |
| model_type = "symphony_speech_encoder" |
| def __init__( |
| self, |
| n_mels=128, |
| n_ctx=1500, |
| n_state=1280, |
| n_head=20, |
| n_layer=32, |
| stage_tokens=[80, 80, 80], |
| compression_size=50, |
| **kwargs |
| ): |
| super().__init__(**kwargs) |
| self.n_mels = n_mels |
| self.n_ctx = n_ctx |
| self.n_state = n_state |
| self.n_head = n_head |
| self.n_layer = n_layer |
| self.stage_tokens = stage_tokens |
| self.compression_size = compression_size |
|
|
| class SymphonyConfig(PretrainedConfig): |
| model_type = "symphony" |
| def __init__( |
| self, |
| encoder_config=None, |
| llm_config=None, |
| lora_r=16, |
| lora_a=64, |
| llm_modules=None, |
| low_resource=False, |
| **kwargs |
| ): |
| |
| if llm_modules is None: |
| llm_modules = ["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"] |
| |
| |
| if llm_config is None: |
| llm_config = AutoConfig.from_pretrained("Qwen/Qwen3-4B") |
| elif isinstance(llm_config, dict): |
| if "_name_or_path" in llm_config: |
| llm_config = AutoConfig.from_pretrained(llm_config["_name_or_path"], **llm_config) |
| else: |
| llm_config = AutoConfig.from_dict(llm_config) |
| |
| |
| if encoder_config is None: |
| encoder_config = SymphonySpeechEncoderConfig() |
| elif isinstance(encoder_config, dict): |
| encoder_config = SymphonySpeechEncoderConfig(**encoder_config) |
|
|
| self.llm_config = llm_config |
| self.encoder_config = encoder_config |
| self.lora_r = lora_r |
| self.lora_a = lora_a |
| self.llm_modules = llm_modules |
| self.low_resource = low_resource |
|
|
| super().__init__(**kwargs) |