| # @package _global_ | |
| hydra: | |
| job: | |
| config: | |
| override_dirname: | |
| kv_sep: ':' | |
| item_sep: '/' | |
| exclude_keys: | |
| - run_config | |
| - distributed_training.distributed_port | |
| - distributed_training.distributed_world_size | |
| - model.pretrained_model_path | |
| - model.target_network_path | |
| - next_script | |
| - task.cache_in_scratch | |
| - task.data | |
| - checkpoint.save_interval_updates | |
| - checkpoint.keep_interval_updates | |
| - checkpoint.save_on_overflow | |
| sweep: | |
| dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} | |
| subdir: '' | |
| launcher: | |
| submitit_folder: ${hydra.sweep.dir} | |
| timeout_min: 4320 | |
| cpus_per_task: 10 | |
| gpus_per_node: 8 | |
| tasks_per_node: 8 | |
| mem_gb: 450 | |
| nodes: 2 | |
| name: ${env:PREFIX}_${hydra.job.config_name} | |
| partition: devlab,learnlab,learnfair,scavenge | |
| constraint: volta32gb,ib4 | |
| max_num_timeout: 30 | |