10k_hybrid / config.json
Spring14th's picture
Upload config.json with huggingface_hub
8d12888 verified
{
"type": "hybrid_act_diffusion",
"n_obs_steps": 1,
"normalization_mapping": {
"VISUAL": "MEAN_STD",
"STATE": "MEAN_STD",
"ACTION": "MEAN_STD"
},
"input_features": {
"observation.images.top": {
"type": "VISUAL",
"shape": [
3,
480,
640
]
},
"observation.state": {
"type": "STATE",
"shape": [
14
]
}
},
"output_features": {
"action": {
"type": "ACTION",
"shape": [
14
]
}
},
"device": "cuda",
"use_amp": false,
"chunk_size": 100,
"n_action_steps": 100,
"temporal_ensemble_coeff": null,
"vision_backbone": "resnet18",
"pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
"replace_final_stride_with_dilation": false,
"use_group_norm": true,
"pre_norm": false,
"dim_model": 512,
"n_heads": 8,
"dim_feedforward": 3200,
"feedforward_activation": "relu",
"n_encoder_layers": 4,
"n_decoder_layers": 1,
"use_vae": true,
"latent_dim": 32,
"n_vae_encoder_layers": 4,
"noise_scheduler_type": "DDPM",
"num_train_timesteps": 50,
"beta_schedule": "squaredcos_cap_v2",
"beta_start": 0.0001,
"beta_end": 0.02,
"prediction_type": "epsilon",
"clip_sample": true,
"clip_sample_range": 1.0,
"diffusion_refine_steps": 3,
"spatial_softmax_num_keypoints": 32,
"diffusion_down_dims": [
256,
512
],
"diffusion_kernel_size": 3,
"diffusion_n_groups": 8,
"diffusion_step_embed_dim": 64,
"diffusion_use_film_scale_modulation": true,
"hybrid_mode": "sequential",
"hybrid_weight": 0.5,
"keyframe_indices": [
0,
49,
99
],
"diffusion_loss_weight": 1.0,
"dropout": 0.1,
"kl_weight": 1.0,
"smoothness_weight": 0.0,
"smoothness_type": "velocity",
"optimizer_lr": 1e-05,
"optimizer_weight_decay": 0.0001,
"optimizer_lr_backbone": 1e-05,
"optimizer_lr_diffusion": 0.0001,
"scheduler_name": "cosine",
"scheduler_num_warmup_steps": 500,
"scheduler_num_training_steps": 30000
}