Ming-UniVision-16B-A3B / preprocessor_config.json
zyhuangnus's picture
Upload folder using huggingface_hub
c46ddda verified
{
"auto_map": {
"AutoImageProcessor": "image_processing_bailingmm.BailingMMImageProcessor",
"AutoFeatureExtractor": "audio_processing_bailingmm.BailingMMAudioProcessor",
"AutoProcessor": "processing_bailingmm.BailingMMProcessor"
},
"image_grid_thws": [1, 32, 32],
"min_pixels": 78400,
"max_pixels": 2007040,
"patch_size": 14,
"temporal_patch_size": 2,
"merge_size": 2,
"image_mean": [
0.48145466,
0.4578275,
0.40821073
],
"image_std": [
0.26862954,
0.26130258,
0.27577711
],
"image_token": "<image>",
"video_token": "<video>",
"image_processor_type": "BailingMMImageProcessor",
"audio_processor_type": "BailingMMAudioProcessor",
"audio_token": "<audio>",
"chunk_length": 30,
"dither": 0.0,
"feature_size": 80,
"frame_length": 25,
"frame_shift": 10,
"inverse_norm": true,
"lfr_m": 7,
"lfr_n": 6,
"n_samples": 480000,
"num_audio_tokens": 256,
"sampling_rate": 16000,
"up_sample": true,
"return_attention_mask": true,
"padding_side": "right",
"padding_value": 0.0,
"processor_class": "BailingMMProcessor",
"wav_frontend_args": {
"cmvn_file": "am.mvn",
"fs": 16000,
"window": "hamming",
"n_mels": 80,
"frame_length": 25,
"frame_shift": 10,
"lfr_m": 7,
"lfr_n": 6
},
"whisper_frontend_args": {
"n_mels": 128
}
}