| { |
| "architectures": [ |
| "PerceiverMaskedLanguageModel" |
| ], |
| "model_config": { |
| "activation_checkpointing": false, |
| "activation_offloading": false, |
| "decoder": { |
| "cross_attention_residual": false, |
| "cross_attention_widening_factor": 1, |
| "dropout": 0.1, |
| "freeze": false, |
| "init_scale": 0.02, |
| "max_seq_len": 2048, |
| "num_cross_attention_heads": 8, |
| "num_cross_attention_qk_channels": 256, |
| "num_cross_attention_v_channels": 768, |
| "num_output_query_channels": null, |
| "vocab_size": 262 |
| }, |
| "encoder": { |
| "cross_attention_widening_factor": 1, |
| "dropout": 0.1, |
| "first_cross_attention_layer_shared": false, |
| "first_self_attention_block_shared": true, |
| "freeze": false, |
| "init_scale": 0.02, |
| "max_seq_len": 2048, |
| "num_cross_attention_heads": 8, |
| "num_cross_attention_layers": 1, |
| "num_cross_attention_qk_channels": 256, |
| "num_cross_attention_v_channels": 1280, |
| "num_input_channels": 768, |
| "num_self_attention_blocks": 1, |
| "num_self_attention_heads": 8, |
| "num_self_attention_layers_per_block": 26, |
| "num_self_attention_qk_channels": 256, |
| "num_self_attention_v_channels": 1280, |
| "params": null, |
| "self_attention_widening_factor": 1, |
| "vocab_size": 262 |
| }, |
| "num_latent_channels": 1280, |
| "num_latents": 256 |
| }, |
| "model_type": "perceiver-io-masked-language-model", |
| "tokenizer_class": "PerceiverTokenizer", |
| "torch_dtype": "float32", |
| "transformers_version": "4.28.0" |
| } |
|
|