| { | |
| "auto_map": { | |
| "AutoProcessor": "processing_nvila.NVILAProcessor" | |
| }, | |
| "do_convert_rgb": null, | |
| "do_normalize": true, | |
| "do_rescale": true, | |
| "do_resize": true, | |
| "image_mean": [ | |
| 0.5, | |
| 0.5, | |
| 0.5 | |
| ], | |
| "image_processor_type": "SiglipImageProcessor", | |
| "image_std": [ | |
| 0.5, | |
| 0.5, | |
| 0.5 | |
| ], | |
| "processor_class": "NVILAProcessor", | |
| "resample": 3, | |
| "rescale_factor": 0.00392156862745098, | |
| "size": { | |
| "height": 392, | |
| "width": 392 | |
| }, | |
| "autogaze_model_id": "bfshi/AutoGaze", | |
| "gazing_ratio_tile": 0.75, | |
| "gazing_ratio_thumbnail": 0.75, | |
| "task_loss_requirement_tile": 0.7, | |
| "task_loss_requirement_thumbnail": 0.7, | |
| "target_scales": [56, 112, 196, 392], | |
| "target_patch_size": 16, | |
| "num_video_frames": 8, | |
| "max_tiles_video": 8, | |
| "num_video_frames_thumbnail": 8, | |
| "mm_projector_shuffle_num": 9, | |
| "max_batch_size_autogaze": 32 | |
| } | |