{ "_name_or_path": "./", "architectures": [ "VideoMAEv2ForVideoClassification" ], "auto_map": { "AutoConfig": "OpenGVLab/VideoMAEv2-Base--modeling_config.VideoMAEv2Config", "AutoModel": "OpenGVLab/VideoMAEv2-Base--modeling_videomaev2.VideoMAEv2" }, "hidden_size": 768, "model_config": { "attn_drop_rate": 0.0, "cos_attn": false, "depth": 12, "drop_path_rate": 0.0, "drop_rate": 0.0, "embed_dim": 768, "img_size": 224, "in_chans": 3, "init_values": 0.0, "layer_norm_eps": 1e-06, "mlp_ratio": 4, "norm_layer": "nn.LayerNorm", "num_classes": 0, "num_frames": 16, "num_heads": 12, "patch_size": 16, "qk_scale": null, "qkv_bias": true, "tubelet_size": 2, "use_learnable_pos_emb": false, "use_mean_pooling": true, "with_cp": false }, "model_type": "VideoMAEv2_Base", "problem_type": "single_label_classification", "torch_dtype": "float32", "transformers_version": "4.49.0", "use_cache": true }