{ "auto_map": { "AutoConfig": "configuration_finelap.FineLAPConfig", "AutoModel": "modeling_finelap.FineLAPModel" }, "architectures": [ "FineLAPModel" ], "audio_config": { "_attn_implementation_autoset": true, "activation_dropout": 0.0, "attn_drop_rate": 0.0, "depth": 12, "drop_rate": 0.0, "embed_dim": 768, "end_drop_path_rate": 0.0, "fixed_positions": true, "img_size": [ 1024, 128 ], "in_chans": 1, "layer_norm_first": false, "mel_bins": 128, "mlp_ratio": 4.0, "model_type": "eat", "model_variant": "pretrain", "norm_affine": true, "norm_eps": 1e-06, "num_classes": 527, "num_heads": 12, "patch_size": 16, "post_mlp_drop": 0.0, "qkv_bias": true, "start_drop_path_rate": 0.0, "stride": 16 }, "b_global": -10.0, "b_local": -10.0, "embed_size": 1024, "local_audio_proj_type": "transformer", "model_type": "finelap", "normalize_dense_audio_embeds": true, "temp_global": 0.1, "temp_local": 0.1, "text_encoder_name": "roberta-base", "torch_dtype": "float32", "transformers_version": "4.51.3", "unify_audio_proj": false }