| { | |
| "context_length": 328, | |
| "emb_dim": 768, | |
| "embed_dim": 512, | |
| "fpn_in": [ | |
| 512, | |
| 768, | |
| 768 | |
| ], | |
| "fpn_out": [ | |
| 768, | |
| 768, | |
| 768, | |
| 512 | |
| ], | |
| "image_resolution": 224, | |
| "output_dim": 512, | |
| "patch_size": 32, | |
| "ratio": 0.9, | |
| "transformer_heads": 8, | |
| "transformer_layers": 12, | |
| "transformer_width": 512, | |
| "txt_length": 328, | |
| "vision_layers": 12, | |
| "vision_patch_size": 32, | |
| "vision_width": 768, | |
| "vocab_size": 49408 | |
| } |