LongCat-Flash-Omni / vision /config.json
aiqtech's picture
Duplicate from meituan-longcat/LongCat-Flash-Omni
714cf11 verified
{
"resolution_mode": "native",
"init_method": "xavier",
"pe_type": "rope2d",
"rope_theta": 10000,
"patch_size": 14,
"temporal_patch_size": 2,
"spatial_merge_size": 1,
"num_hidden_layers": 32,
"num_attention_heads": 16,
"hidden_size": 1280,
"intermediate_size": 5184,
"patch_embedding_bias": true,
"qk_normalization": true,
"qkv_bias": false,
"use_pre_norm": false,
"use_flash_attn": true,
"norm_type": "RMSNorm",
"layer_norm_eps": 1e-06,
"hidden_act": "SwiGLU",
"patch_dropout": 0.5,
"attention_dropout": 0.0,
"drop_path_rate": 0.0,
"initializer_range": 1e-10,
"initializer_factor": 0.1,
"image_size": 1792,
"min_tokens": 576,
"max_tokens": 5832,
"min_tokens_video": 512,
"max_tokens_video": 28672,
"min_tokens_video_vit": 576,
"max_tokens_video_vit": 156800,
"max_frames": 128,
"image_mean": [0.485, 0.456, 0.406],
"image_std": [0.229, 0.224, 0.225],
"resize_factor": 8,
"relarge_ratio": 1.0,
"adaptor_dim": 7168
}