| { |
| "_name_or_path": null, |
| "architectures": [ |
| "SMAForSSL" |
| ], |
| "attention_dropout_prob": 0.0, |
| "cross_attention_widening_factor": 1, |
| "cross_eval_noising_args": null, |
| "cross_train_noising_args": [ |
| [ |
| "RandomlySelectedCrossAttentionMasking", |
| { |
| "exclude_seen_reconstruction": true, |
| "masking_ratio": 0.15, |
| "num_per_query": 4, |
| "varying_length": true |
| } |
| ] |
| ], |
| "decoder_attention_channels": 512, |
| "decoder_heads": 8, |
| "decoder_latent_channels": 512, |
| "decoder_type": "cross_attention", |
| "dense_use_bias": true, |
| "drop_path_rate": 0.0, |
| "embedded_channels": 512, |
| "encoder_cross_attention_channels": 256, |
| "encoder_type": "cross_attention", |
| "final_project": true, |
| "hidden_act": "gelu", |
| "hidden_dropout_prob": 0.0, |
| "initializer_range": 0.02, |
| "input_channels": 3, |
| "input_type": "discrete", |
| "latent_channels": 1024, |
| "layer_norm_eps": 1e-12, |
| "layernorm_eps": 1e-12, |
| "loss_fn": "mse", |
| "max_position_embeddings": 1024, |
| "model_type": "sma", |
| "num_blocks": 1, |
| "num_cross_attention_heads": 8, |
| "num_discrete_tokens": 262, |
| "num_latents": 256, |
| "num_outputs": 1024, |
| "num_self_attends_per_block": 16, |
| "num_self_attention_heads": 8, |
| "output_channels": 262, |
| "pe_initializer_range": 0.02, |
| "post_decoder_layers": null, |
| "project_after_concat": true, |
| "qk_channels": 256, |
| "self_attention_widening_factor": 1, |
| "share_decoder_queries": true, |
| "share_embedding_weights": true, |
| "teacher_args": { |
| "auxiliary_loss_fn": "mse", |
| "auxiliary_loss_weight": 1.0, |
| "ema_args": { |
| "ema_decay_end": 0.0, |
| "ema_decay_start": 0.0 |
| }, |
| "eval_transform_args": null, |
| "mask_replace": 3, |
| "num_layer_target_avg": null, |
| "reconstruction_decoder_args": { |
| "num_heads": 1, |
| "num_outputs": 1024, |
| "output_channels": 262, |
| "qk_channels": 256, |
| "query_num_channels": 512, |
| "share_decoder_queries": true, |
| "share_embedding_weights": true, |
| "use_query_residual": true, |
| "v_channels": 512 |
| }, |
| "reconstruction_loss_fn": "crossentropy", |
| "reconstruction_loss_weight": 1.0, |
| "reconstruction_weighted_loss": false, |
| "target_normalization_fn": "layernorm", |
| "train_transform_args": null |
| }, |
| "teacher_name": "ReconstructionTeacher", |
| "torch_dtype": "float32", |
| "transformers_version": "4.26.0.dev0", |
| "use_decoder": false, |
| "use_position_embeddings": true, |
| "use_query_residual": true, |
| "v_channels": 1024 |
| } |
|
|