{ "add_frame_cond": null, "batch_size": 4, "cond_mask_prob": 0.2, "cuda": true, "data_format": "face", "data_root": "./dataset/PXB184/", "dataset": "social", "device": 0, "diffusion_steps": 10, "heads": 8, "lambda_vel": 0.0, "latent_dim": 512, "layers": 8, "log_interval": 1000, "lr": 0.0001, "lr_anneal_steps": 0, "max_seq_length": 600, "noise_schedule": "cosine", "not_rotary": false, "num_audio_layers": 3, "num_steps": 800000, "overwrite": false, "resume_checkpoint": "", "save_dir": "checkpoints/diffusion/c1_face/", "save_interval": 5000, "seed": 10, "sigma_small": true, "simplify_audio": false, "timestep_respacing": "", "train_platform_type": "NoPlatform", "unconstrained": false, "weight_decay": 0.0 }