| { | |
| "meta": { | |
| "model_name": "pose-centric-multimodal-har", | |
| "version": "v1.0", | |
| "description": "Pose-centric multimodal temporal HAR model" | |
| }, | |
| "pose_model": { | |
| "backbone": "PoseFormerFactorized", | |
| "joints": 17, | |
| "in_channels": 3, | |
| "hidden_dim": 128, | |
| "layers": 4, | |
| "heads": 8, | |
| "max_seq_len": 30, | |
| "use_sinusoidal_pe": false, | |
| "enable_temporal_attention": true, | |
| "return_tokens": true | |
| }, | |
| "long_term_model": { | |
| "enabled": true, | |
| "type": "transformer_encoder", | |
| "layers": 2, | |
| "heads": 8, | |
| "dropout": 0.1 | |
| }, | |
| "image_model": { | |
| "enabled": true, | |
| "backbone": "resnet18", | |
| "feature_dim": 128, | |
| "pretrained": true | |
| }, | |
| "fusion_model": { | |
| "enabled": true, | |
| "mode": "concat", | |
| "output_dim": 128 | |
| }, | |
| "classifier": { | |
| "num_classes": 500 | |
| }, | |
| "regularization": { | |
| "short_term_dropout": 0.3, | |
| "long_term_dropout": 0.1 | |
| }, | |
| "precision": "fp32" | |
| } |