{ "meta": { "model_name": "pose-centric-multimodal-har", "version": "v1.0", "description": "Pose-centric multimodal temporal HAR model" }, "pose_model": { "backbone": "PoseFormerFactorized", "joints": 17, "in_channels": 3, "hidden_dim": 128, "layers": 4, "heads": 8, "max_seq_len": 30, "use_sinusoidal_pe": false, "enable_temporal_attention": true, "return_tokens": true }, "long_term_model": { "enabled": true, "type": "transformer_encoder", "layers": 2, "heads": 8, "dropout": 0.1 }, "image_model": { "enabled": true, "backbone": "resnet18", "feature_dim": 128, "pretrained": true }, "fusion_model": { "enabled": true, "mode": "concat", "output_dim": 128 }, "classifier": { "num_classes": 500 }, "regularization": { "short_term_dropout": 0.3, "long_term_dropout": 0.1 }, "precision": "fp32" }