| { | |
| "_name_or_path": "outputs/bak0416/lucy_deepseek_adaptive_s2p2/checkpoint-7200", | |
| "additional_tokens": { | |
| "ANS_A": 4099, | |
| "ANS_T": 102403, | |
| "AQA": 4101, | |
| "AQAA": 4102, | |
| "ASR": 4100, | |
| "BOA": 4098, | |
| "BOT": 102402, | |
| "EOA": 4096, | |
| "EOT": 102400, | |
| "ER": 4105, | |
| "F10": 4104, | |
| "M29": 4103, | |
| "PAD_A": 4097, | |
| "PAD_T": 102401, | |
| "TQA": 102405, | |
| "TQAA": 102406, | |
| "TTS": 102404 | |
| }, | |
| "architectures": [ | |
| "DeepseekV2ForCausalLM" | |
| ], | |
| "attention_bias": false, | |
| "attention_dropout": 0.0, | |
| "audio_additional_tokens": { | |
| "ANS_A": 4099, | |
| "AQA": 4101, | |
| "AQAA": 4102, | |
| "ASR": 4100, | |
| "BOA": 4098, | |
| "EOA": 4096, | |
| "ER": 4105, | |
| "F10": 4104, | |
| "M29": 4103, | |
| "PAD_A": 4097 | |
| }, | |
| "audio_experts_file": "/home/tione/notebook/alanhshao/LUCY/generated/outputs-vita_deepseek-chat_s3-test/audio_experts.json", | |
| "audio_num_experts": 6, | |
| "audio_special_tokens": 64, | |
| "audio_vocab_size": 4096, | |
| "audio_vocab_size_padded": 4160, | |
| "auto_map": { | |
| "AutoConfig": "configuration_deepseek.DeepseekV2Config", | |
| "AutoModel": "modeling_deepseek.DeepseekV2Model", | |
| "AutoModelForCausalLM": "modeling_deepseek.DeepseekV2ForCausalLM" | |
| }, | |
| "aux_loss_alpha": 0.001, | |
| "bos_token_id": 100000, | |
| "cache_dir": null, | |
| "eos_token_id": 100001, | |
| "ep_size": 1, | |
| "first_k_dense_replace": 1, | |
| "freeze_audio_experts": false, | |
| "freeze_text_experts": false, | |
| "hidden_act": "silu", | |
| "hidden_size": 2048, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 10944, | |
| "kv_lora_rank": 512, | |
| "loss_reduction": "mean", | |
| "loss_weights": [ | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0 | |
| ], | |
| "max_position_embeddings": 163840, | |
| "mm_audio_encoder": "/home/tione/notebook/alanhshao/pretrained_models/whisper-medium", | |
| "mm_audio_encoder_hidden_size": 1024, | |
| "mm_audio_encoder_type": "whisper", | |
| "mm_audio_num_codebook": 7, | |
| "mm_audio_projector_hidden_size": 7168, | |
| "mm_audio_projector_type": "linear", | |
| "model_type": "deepseek_v2", | |
| "moe_intermediate_size": 1408, | |
| "moe_layer_freq": 1, | |
| "n_group": 1, | |
| "n_routed_experts": 64, | |
| "n_shared_experts": 2, | |
| "norm_topk_prob": false, | |
| "num_attention_heads": 16, | |
| "num_experts_per_tok": 6, | |
| "num_hidden_layers": 27, | |
| "num_key_value_heads": 16, | |
| "post_tts_adapter": false, | |
| "pretraining_tp": 1, | |
| "q_lora_rank": null, | |
| "qk_nope_head_dim": 128, | |
| "qk_rope_head_dim": 64, | |
| "rms_norm_eps": 1e-06, | |
| "rope_scaling": { | |
| "beta_fast": 32, | |
| "beta_slow": 1, | |
| "factor": 40, | |
| "mscale": 0.707, | |
| "mscale_all_dim": 0.707, | |
| "original_max_position_embeddings": 4096, | |
| "type": "yarn" | |
| }, | |
| "rope_theta": 10000, | |
| "routed_scaling_factor": 1.0, | |
| "scoring_func": "softmax", | |
| "seq_aux": true, | |
| "text_additional_tokens": { | |
| "ANS_T": 102403, | |
| "BOT": 102402, | |
| "EOT": 102400, | |
| "PAD_T": 102401, | |
| "TQA": 102405, | |
| "TQAA": 102406, | |
| "TTS": 102404 | |
| }, | |
| "text_special_tokens": 64, | |
| "text_vocab_size": 102400, | |
| "text_vocab_size_padded": 102464, | |
| "tie_word_embeddings": false, | |
| "tokenizer_model_max_length": 32768, | |
| "tokenizer_padding_side": "right", | |
| "topk_group": 1, | |
| "topk_method": "greedy", | |
| "torch_dtype": "bfloat16", | |
| "total_vocab_size": 131584, | |
| "transformers_version": "4.45.0", | |
| "tune_text_embed": true, | |
| "use_cache": false, | |
| "v_head_dim": 128, | |
| "vocab_size": 131584 | |
| } | |