| { |
| "architectures": ["BitMarModel"], |
| "auto_map": { |
| "AutoConfig": "modeling_bitmar.BitMarConfig", |
| "AutoModel": "modeling_bitmar.BitMarModel" |
| }, |
| "model_type": "bitmar", |
| "vocab_size": 50257, |
| "text_encoder_dim": 128, |
| "text_encoder_layers": 4, |
| "text_encoder_heads": 4, |
| "text_decoder_dim": 128, |
| "text_decoder_layers": 4, |
| "text_decoder_heads": 4, |
| "vision_encoder_dim": 768, |
| "vision_latent_size": 128, |
| "vision_hidden_size": 64, |
| "vision_compression_method": "learned_compression", |
| "vision_spatial_pooling": true, |
| "vision_pool_size": 2, |
| "fusion_hidden_size": 128, |
| "fusion_num_heads": 4, |
| "fusion_num_layers": 2, |
| "memory_size": 32, |
| "episode_dim": 128, |
| "memory_alpha": 0.2, |
| "direct_writing": true, |
| "memory_compression": true, |
| "max_seq_len": 256, |
| "dropout": 0.15, |
| "torch_dtype": "float32", |
| "transformers_version": "4.36.0", |
| "use_cache": true, |
| "tie_word_embeddings": true, |
| "initializer_range": 0.02, |
| "layer_norm_epsilon": 1e-5, |
| "pad_token_id": 50256, |
| "bos_token_id": 50256, |
| "eos_token_id": 50256, |
| "sep_token_id": null, |
| "decoder_start_token_id": null |
| } |