{ "feature_extractor": { "chunk_length": 300, "dither": 0.0, "feature_extractor_type": "WhisperFeatureExtractor", "feature_size": 128, "hop_length": 160, "image_mean": [ 0.48145466, 0.4578275, 0.40821073 ], "image_processor_type": "Qwen2VLImageProcessor", "image_std": [ 0.26862954, 0.26130258, 0.27577711 ], "max_pixels": 12845056, "merge_size": 2, "min_pixels": 3136, "n_fft": 400, "n_samples": 4800000, "nb_max_frames": 30000, "padding_side": "right", "padding_value": 0.0, "patch_size": 14, "return_attention_mask": true, "sampling_rate": 16000, "temporal_patch_size": 2 }, "image_processor": { "chunk_length": 300, "data_format": "channels_first", "dither": 0.0, "do_convert_rgb": true, "do_normalize": true, "do_rescale": true, "do_resize": true, "feature_size": 128, "hop_length": 160, "image_mean": [ 0.48145466, 0.4578275, 0.40821073 ], "image_processor_type": "Qwen2VLImageProcessorFast", "image_std": [ 0.26862954, 0.26130258, 0.27577711 ], "merge_size": 2, "n_fft": 400, "n_samples": 4800000, "nb_max_frames": 30000, "padding_side": "right", "padding_value": 0.0, "patch_size": 14, "resample": 3, "rescale_factor": 0.00392156862745098, "return_attention_mask": true, "sampling_rate": 16000, "size": { "longest_edge": 12845056, "shortest_edge": 3136 }, "temporal_patch_size": 2 }, "processor_class": "Qwen2_5OmniProcessor", "video_processor": { "chunk_length": 300, "data_format": "channels_first", "default_to_square": true, "dither": 0.0, "do_convert_rgb": true, "do_normalize": true, "do_rescale": true, "do_resize": true, "do_sample_frames": false, "feature_extractor_type": "WhisperFeatureExtractor", "feature_size": 128, "hop_length": 160, "image_mean": [ 0.48145466, 0.4578275, 0.40821073 ], "image_processor_type": "Qwen2VLImageProcessor", "image_std": [ 0.26862954, 0.26130258, 0.27577711 ], "max_frames": 768, "merge_size": 2, "min_frames": 4, "n_fft": 400, "n_samples": 4800000, "nb_max_frames": 30000, "padding_side": "right", "padding_value": 0.0, "patch_size": 14, "resample": 3, "rescale_factor": 0.00392156862745098, "return_attention_mask": true, "return_metadata": false, "sampling_rate": 16000, "size": { "longest_edge": 12845056, "shortest_edge": 3136 }, "temporal_patch_size": 2, "video_processor_type": "Qwen2VLVideoProcessor" } }