INC4AI's picture
Upload model
1a740f1 verified
Raw
History Blame Contribute Delete
31 kB
{
"architectures": [
"LongcatNextForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"audio_config": {
"activation_dropout": 0.0,
"activation_function": "gelu",
"apply_spec_augment": false,
"attention_dropout": 0.0,
"audio_delim_token_id": 131116,
"audio_end_token_id": 131104,
"audio_head_transformer_dims": 3072,
"audio_head_transformer_ffn_scale": 16,
"audio_head_transformer_layers": 4,
"audio_pad_token_id": 131105,
"audio_start_token_id": 131103,
"audiogen_end_token_id": 131124,
"audiogen_start_token_id": 131123,
"audiotext_end_token_id": 131121,
"audiotext_pad_token_id": 131122,
"audiotext_start_token_id": 131120,
"avg_pooler": 4,
"classifier_proj_size": 256,
"cosy24kvocoder_config": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"dtype": null,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"model_type": "",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torchscript": false,
"typical_p": 1.0,
"use_bfloat16": false,
"weight_path": "WEIGHT_PATH_TO_LONGCAT_NEXT/cosy24k_vocoder/hift.pt"
},
"d_model": 1280,
"decoder_attention_heads": 20,
"decoder_ffn_dim": 5120,
"decoder_kernel_size": 3,
"decoder_layerdrop": 0.0,
"decoder_layers": 8,
"decoder_stride_size": 2,
"dropout": 0.0,
"encoder_attention_heads": 20,
"encoder_ffn_dim": 5120,
"encoder_layerdrop": 0.0,
"encoder_layers": 32,
"flow_matching_config": {
"_name_or_path": "",
"act_fn": "gelu",
"add_cross_attention": false,
"architectures": null,
"attention_head_dim": 64,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"cal_mel_mae": true,
"cfm_params": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"dtype": null,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"inference_cfg_rate": 0.7,
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"model_type": "",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": null,
"sigma_min": 1e-06,
"solver": "euler",
"suppress_tokens": null,
"t_scheduler": "cosine",
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torchscript": false,
"training_cfg_rate": 0.2,
"typical_p": 1.0,
"use_bfloat16": false
},
"channels": [
256
],
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diffusion_steps": 10,
"diversity_penalty": 0.0,
"do_sample": false,
"dropout": 0.0,
"dtype": null,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"in_channels": 80,
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"model_type": "",
"n_blocks": 4,
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_heads": 8,
"num_mid_blocks": 12,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"prefix": null,
"prenet_activation_function": "gelu",
"prenet_attention_heads": 8,
"prenet_d_model": 512,
"prenet_ffn_dim": 2048,
"prenet_in_dim": 1280,
"prenet_max_source_positions": 5000,
"prenet_nlayers": 12,
"prenet_out_dim": 80,
"prenet_target_mel_length_scale_ratio": 1.0,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": null,
"spk_emb_dim": 0,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torchscript": false,
"typical_p": 1.0,
"use_bfloat16": false,
"use_hidden_states_before_dconv2": true
},
"hop_length": 160,
"init_std": 0.02,
"kernel_size": 3,
"mask_feature_length": 10,
"mask_feature_min_masks": 0,
"mask_feature_prob": 0.0,
"mask_time_length": 10,
"mask_time_min_masks": 2,
"mask_time_prob": 0.05,
"max_audio_seconds": 30,
"max_source_positions": 1500,
"max_target_positions": 448,
"median_filter_width": 7,
"model_type": "longcat_next_audio",
"n_fft": 400,
"num_hidden_layers": 32,
"num_mel_bins": 128,
"sampling_rate": 16000,
"scale_embedding": false,
"stride_size": 2,
"use_cache": true,
"use_weighted_layer_sum": false,
"vocab_size": 51865,
"vocoder_config": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"channels": [
256,
256,
256,
256,
256
],
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"dtype": null,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"hop_length": 256,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"model_type": "",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_mel_bins": 80,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sampling_rate": 16000,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torchscript": false,
"typical_p": 1.0,
"use_bfloat16": false
},
"vq_config": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"codebook_sizes": [
8192,
4096,
2048,
1024,
1024,
1024,
1024,
1024
],
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"dtype": null,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"model_type": "",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torchscript": false,
"typical_p": 1.0,
"use_bfloat16": false
}
},
"audio_offset": 131125,
"auto_map": {
"AutoConfig": "configuration_longcat_next.LongcatNextConfig",
"AutoModel": "modeling_longcat_next.LongcatNextModel",
"AutoModelForCausalLM": "modeling_longcat_next.LongcatNextForCausalLM"
},
"bos_token_id": 1,
"dtype": "bfloat16",
"emb_neighbor_num": 4,
"emb_split_num": 4,
"eos_token_id": 2,
"expert_ffn_hidden_size": 1024,
"ffn_hidden_size": 6144,
"head_dim": 64,
"hidden_act": "silu",
"hidden_size": 3072,
"initializer_range": 0.02,
"kv_lora_rank": 512,
"max_position_embeddings": 131072,
"mla_scale_kv_lora": true,
"mla_scale_q_lora": true,
"model_type": "longcat_next",
"moe_topk": 12,
"n_routed_experts": 256,
"ngram_vocab_size_ratio": 78,
"num_attention_heads": 32,
"num_hidden_layers": 28,
"num_key_value_heads": 32,
"num_layers": 14,
"oe_ignored_token_ids": [
131072,
131073,
131074,
131075,
131076,
131077,
131078,
131079,
131080,
131081,
131082,
131083,
131084,
131085,
131086,
131087,
131088,
131089,
131090,
131091,
131092,
131093,
131094,
131095,
131096,
131097,
131098,
131099,
131100,
131101,
131102,
131103,
131104,
131105,
131106,
131107,
131108,
131109,
131110,
131111,
131112,
131113,
131114,
131115,
131116,
131117,
131118,
131119,
131120,
131121,
131122,
131123,
131124
],
"q_lora_rank": 1536,
"qk_head_dim": 192,
"qk_nope_head_dim": 128,
"qk_rope_head_dim": 64,
"quantization_config": {
"autoround_version": "0.13.0",
"batch_size": 1,
"bits": 4,
"block_name_to_quantize": "model.layers",
"data_type": "int",
"extra_config": {
".*classifier.*": {
"bits": 16,
"data_type": "float"
},
"model.layers.0.mlp.router.classifier": {
"bits": 16,
"data_type": "float"
},
"model.layers.1.mlp.router.classifier": {
"bits": 16,
"data_type": "float"
},
"model.layers.10.mlp.router.classifier": {
"bits": 16,
"data_type": "float"
},
"model.layers.11.mlp.router.classifier": {
"bits": 16,
"data_type": "float"
},
"model.layers.12.mlp.router.classifier": {
"bits": 16,
"data_type": "float"
},
"model.layers.13.mlp.router.classifier": {
"bits": 16,
"data_type": "float"
},
"model.layers.2.mlp.router.classifier": {
"bits": 16,
"data_type": "float"
},
"model.layers.3.mlp.router.classifier": {
"bits": 16,
"data_type": "float"
},
"model.layers.4.mlp.router.classifier": {
"bits": 16,
"data_type": "float"
},
"model.layers.5.mlp.router.classifier": {
"bits": 16,
"data_type": "float"
},
"model.layers.6.mlp.router.classifier": {
"bits": 16,
"data_type": "float"
},
"model.layers.7.mlp.router.classifier": {
"bits": 16,
"data_type": "float"
},
"model.layers.8.mlp.router.classifier": {
"bits": 16,
"data_type": "float"
},
"model.layers.9.mlp.router.classifier": {
"bits": 16,
"data_type": "float"
}
},
"gradient_accumulate_steps": 8,
"group_size": 128,
"packing_format": "auto_round:auto_gptq",
"quant_method": "auto-round",
"seqlen": 512,
"sym": true
},
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 10000000,
"routed_scaling_factor": 6.0,
"text_vocab_plus_multimodal_special_token_size": 131125,
"text_vocab_size": 131072,
"tie_word_embeddings": false,
"transformers_version": "4.57.6",
"use_cache": true,
"v_head_dim": 128,
"visual_config": {
"depth": 32,
"fullatt_block_indexes": [
7,
15,
23,
31
],
"hidden_act": "silu",
"hidden_size": 1280,
"image_end_token_id": 131107,
"image_head_transformer_dims": 2048,
"image_head_transformer_ffn_scale": 16,
"image_head_transformer_layers": 4,
"image_newline_token_id": 131109,
"image_pad_token_id": 131108,
"image_start_token_id": 131106,
"in_channels": 3,
"initializer_range": 0.02,
"intermediate_size": 3420,
"model_type": "longcat_next_visual",
"num_heads": 16,
"out_hidden_size": 3584,
"patch_size": 14,
"spatial_merge_size": 2,
"temporal_patch_size": 2,
"tokens_per_second": 4,
"visual_decoder_config": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"codebook_dim": 3584,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"dtype": null,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"image_decoder_config": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"attention_dropout": 0.0,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"codebook_dim": 3584,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"distill_taps": [
3,
7,
15,
23
],
"diversity_penalty": 0.0,
"do_sample": false,
"dtype": null,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"hidden_act": "gelu",
"hidden_size": 1024,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"intermediate_size": 2730,
"is_decoder": false,
"is_encoder_decoder": false,
"k_bias": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"layer_norm_eps": 1e-06,
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"model_type": "",
"no_repeat_ngram_size": 0,
"num_attention_heads": 16,
"num_beam_groups": 1,
"num_beams": 1,
"num_hidden_layers": 32,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"patch_size": 14,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"q_bias": true,
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": null,
"spatial_merge_size": 2,
"subln": true,
"suppress_tokens": null,
"swiglu": true,
"task_specific_params": null,
"teacher_dims": {
"15": 1280,
"23": 1280,
"3": 1280,
"7": 1280
},
"temperature": 1.0,
"temporal_patch_size": 2,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torchscript": false,
"typical_p": 1.0,
"use_bfloat16": false,
"v_bias": true
},
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"model_type": "",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"scheduler_config": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"dtype": null,
"dynamic_time_shift": true,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"model_type": "",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_return_sequences": 1,
"num_train_timesteps": 1000,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torchscript": false,
"typical_p": 1.0,
"use_bfloat16": false
},
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torchscript": false,
"transformer_config": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"axes_dim_rope": [
40,
40,
40
],
"axes_lens": [
10000,
10000,
10000
],
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"dtype": null,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"hidden_size": 2520,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"in_channels": 16,
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"model_type": "",
"multiple_of": 256,
"no_repeat_ngram_size": 0,
"norm_eps": 1e-05,
"num_attention_heads": 21,
"num_beam_groups": 1,
"num_beams": 1,
"num_kv_heads": 7,
"num_layers": 32,
"num_refiner_layers": 2,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"patch_size": 2,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"text_feat_dim": 2048,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"timestep_scale": 1000.0,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torchscript": false,
"typical_p": 1.0,
"use_bfloat16": false
},
"typical_p": 1.0,
"use_bfloat16": false,
"vae_config": {
"_name_or_path": "",
"act_fn": "silu",
"add_cross_attention": false,
"architectures": null,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"block_out_channels": [
128,
256,
512,
512
],
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"down_block_types": [
"DownEncoderBlock2D",
"DownEncoderBlock2D",
"DownEncoderBlock2D",
"DownEncoderBlock2D"
],
"dtype": null,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"force_upcast": true,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"in_channels": 3,
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"latent_channels": 16,
"layers_per_block": 2,
"length_penalty": 1.0,
"max_length": 20,
"mid_block_add_attention": true,
"min_length": 0,
"model_type": "",
"no_repeat_ngram_size": 0,
"norm_num_groups": 32,
"num_beam_groups": 1,
"num_beams": 1,
"num_return_sequences": 1,
"out_channels": 3,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sample_size": 1024,
"scaling_factor": 0.3611,
"sep_token_id": null,
"shift_factor": 0.1159,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torchscript": false,
"typical_p": 1.0,
"up_block_types": [
"UpDecoderBlock2D",
"UpDecoderBlock2D",
"UpDecoderBlock2D",
"UpDecoderBlock2D"
],
"use_bfloat16": false,
"use_post_quant_conv": false,
"use_quant_conv": false
},
"weight_path": "WEIGHT_PATH_TO_LONGCAT_NEXT/image_decoder/image_decoder.safetensors"
},
"vq_config": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"codebook_dim": 3584,
"codebook_size": 16384,
"codebook_sizes": [
16384,
16384,
16384,
16384,
16384,
16384,
16384,
16384
],
"commit_loss_ratio": 0.25,
"cross_attention_hidden_size": null,
"decay": 0.99,
"decoder_start_token_id": null,
"depth": 8,
"diversity_penalty": 0.0,
"do_sample": false,
"dtype": null,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"entropy_loss_ratio": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"in_channels": 3584,
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"model_type": "",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"quant_conv": true,
"quantizer_type": "rq",
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"restart_unused_codes": true,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": null,
"shared_codebook": true,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torchscript": false,
"typical_p": 1.0,
"use_bfloat16": false,
"vq_loss_ratio": 0
},
"window_size": 112
},
"visual_embedding_layer_hidden_act": "silu",
"visual_embedding_layer_intermediate_size": 8192,
"visual_offset": 150581,
"vocab_size": 282624,
"zero_expert_num": 128,
"zero_expert_type": "identity"
}