| { | |
| "vocab_size": 50258, | |
| "emb_dim": 768, | |
| "context_length": 256, | |
| "n_heads": 12, | |
| "n_layers": 12, | |
| "drop_rate": 0.1, | |
| "qkv_bias": true, | |
| "cross_attention_pos": [ | |
| 3, | |
| 6, | |
| 9 | |
| ], | |
| "vision_enabled": true, | |
| "vision_encoder_type": "torchvision_vit_b_16", | |
| "vision_pretrained": true, | |
| "vision_freeze": true, | |
| "perceiver_num_latents": 64, | |
| "perceiver_depth": 2, | |
| "perceiver_heads": 8, | |
| "perceiver_dim_head": 64, | |
| "weight_decay": 0.01, | |
| "learning_rate": 0.0001 | |
| } |