| | |
| |
|
| | |
| |
|
| | |
| | training: |
| | batch_size: 24 |
| | |
| | |
| | max_steps: 50000 |
| | |
| | |
| | max_history_frames: 3 |
| | bidirectional_training: true |
| | |
| | initial_teacher_forcing_prob: 0.5 |
| | initial_frame: |
| | strategy: "middle" |
| | offset: 0 |
| | random_history_sampling: true |
| | freeze_static_from_anchor: true |
| | multi_sample_attempts: 5 |
| | decoder_noise_std: 0.2 |
| | frame_rate: 8.0 |
| | |
| | |
| | debug_print_interval: 1 |
| | log_interval: 50 |
| | |
| | learning_rate: 0.001 |
| | gradient_clip_val: 1.0 |
| | |
| | |
| | use_multi_gpu: true |
| | gpu_list: [0, 1, 2, 3, 4, 5, 6, 7] |
| | use_free_gpus: true |
| | |
| | |
| | evaluation: |
| | max_batches: 0 |
| |
|
| | save_generation: |
| | enabled: true |
| | save_gt: true |
| | fixed_samples: 5 |
| | save_interval: 100 |
| | save_dir: "core_space" |
| |
|
| |
|
| | |
| |
|
| | |
| | text2wave_model: |
| | |
| | model_name: "google/t5-v1_1-small" |
| |
|
| | |
| | loss: |
| | |
| | |
| | weights: |
| | wave_loss: 4.0 |
| | wave_contrastive_loss: 2.0 |
| | world_info_loss: 0.5 |
| | controllable_info_loss: 0.1 |
| | pla_loss: 3.0 |
| |
|
| | wave_contrastive: |
| | temperature: 0.2 |
| |
|
| | |
| | data: |
| | |
| | num_workers: 32 |
| | max_sequences: 100 |
| |
|
| | physics: |
| | gravity: 9.81 |
| | collision_buffer: 1.05 |
| |
|
| | |
| |
|
| | |
| | grid: |
| | size: 64 |
| | prob_threshold: 0.5 |
| |
|
| | |
| | world_coordinate_system: |
| | enabled: true |
| | world_scale: 10.0 |
| | voxel_size: 0.05 |
| | near_plane: 0.1 |
| | far_plane: 50.0 |
| | predict_world_scale: true |
| | world_scale_loss_weight: 0.1 |
| |
|
| | |
| | camera: |
| | default_view: "front" |
| | fov: 60 |
| | near: 0.1 |
| | far: 100.0 |
| | |
| | views: |
| | front: [0, 0, 2] |
| | back: [0, 0, -2] |
| | left: [-2, 0, 0] |
| | right: [2, 0, 0] |
| | top: [0, 2, 0] |
| | bottom: [0, -2, 0] |
| | |
| | view_rotations: |
| | front: [0, 0, 0] |
| | back: [0, 3.14159, 0] |
| | left: [0, -1.5708, 0] |
| | right: [0, 1.5708, 0] |
| | top: [-1.5708, 0, 0] |
| | bottom: [1.5708, 0, 0] |
| |
|
| | |
| | generation: |
| | mode: "image" |
| | time: |
| | start: 0.0 |
| | end: 12.0 |
| | fps: 30 |
| | timestep: 0.0 |
| | compute_wsf: false |
| | output_dir: "core_space" |
| |
|
| | |
| | output: |
| | format: "triple_channel" |
| | third_channel: "amplitude" |
| |
|
| | |
| | model: |
| | wave_encoder: |
| | hidden_dim: 256 |
| | dropout: 0.1 |
| | |
| | feature_extractor: |
| | input_dim: 4 |
| | hidden_dim: 64 |
| | output_dim: 32 |
| | dropout: 0.1 |
| |
|
| | |
| | pixel_net: |
| | channels: [32, 64, 128, 64, 4] |
| | kernel_size: 3 |
| | padding: 1 |
| | dropout: 0.1 |
| |
|