| { | |
| "architectures": [ | |
| "OpticalFlowPerceiver" | |
| ], | |
| "model_config": { | |
| "activation_checkpointing": false, | |
| "activation_offloading": false, | |
| "decoder": { | |
| "cross_attention_residual": false, | |
| "cross_attention_widening_factor": 1, | |
| "dropout": 0.1, | |
| "freeze": false, | |
| "image_shape": [ | |
| 368, | |
| 496 | |
| ], | |
| "init_scale": 0.02, | |
| "num_cross_attention_heads": 1, | |
| "num_cross_attention_qk_channels": 512, | |
| "num_cross_attention_v_channels": 512, | |
| "rescale_factor": 100.0 | |
| }, | |
| "encoder": { | |
| "cross_attention_widening_factor": 1, | |
| "dropout": 0.1, | |
| "first_cross_attention_layer_shared": false, | |
| "first_self_attention_block_shared": true, | |
| "freeze": false, | |
| "image_shape": [ | |
| 368, | |
| 496 | |
| ], | |
| "init_scale": 0.02, | |
| "num_cross_attention_heads": 1, | |
| "num_cross_attention_layers": 1, | |
| "num_cross_attention_qk_channels": null, | |
| "num_cross_attention_v_channels": null, | |
| "num_frequency_bands": 64, | |
| "num_patch_hidden_channels": 64, | |
| "num_patch_input_channels": 27, | |
| "num_self_attention_blocks": 1, | |
| "num_self_attention_heads": 16, | |
| "num_self_attention_layers_per_block": 24, | |
| "num_self_attention_qk_channels": null, | |
| "num_self_attention_v_channels": null, | |
| "self_attention_widening_factor": 1 | |
| }, | |
| "num_latent_channels": 512, | |
| "num_latents": 2048 | |
| }, | |
| "model_type": "perceiver-io-optical-flow", | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.28.0" | |
| } | |