diff --git "a/training_log.txt" "b/training_log.txt" new file mode 100644--- /dev/null +++ "b/training_log.txt" @@ -0,0 +1,29306 @@ +The following values were not passed to `accelerate launch` and had defaults used instead: + `--mixed_precision` was set to a value of `'no'` + `--dynamo_backend` was set to a value of `'no'` +To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`. +Rank[3/8] 06/19/2025 14:10:14 INFO train.py:239 | if accelerator initialized:True +Rank[3/8] 06/19/2025 14:10:14 INFO train.py:240 | accelerator state: {'_cpu': False, 'backend': 'nccl', 'device': device(type='cuda', index=3), 'debug': False, 'distributed_type': , 'num_processes': 8, 'process_index': 3, 'local_process_index': 3, 'fork_launched': False, 'deepspeed_plugins': None, 'use_ipex': None, 'torch_tp_plugin': None, 'dynamo_plugin': TorchDynamoPlugin(backend=, mode='default', fullgraph=False, dynamic=False, options=None, disable=False), '_mixed_precision': 'no'} +Rank[0/8] 06/19/2025 14:10:14 INFO train.py:239 | if accelerator initialized:True +Rank[2/8] 06/19/2025 14:10:14 INFO train.py:239 | if accelerator initialized:True +Rank[0/8] 06/19/2025 14:10:14 INFO train.py:240 | accelerator state: {'_cpu': False, 'backend': 'nccl', 'device': device(type='cuda', index=0), 'debug': False, 'distributed_type': , 'num_processes': 8, 'process_index': 0, 'local_process_index': 0, 'fork_launched': False, 'deepspeed_plugins': None, 'use_ipex': None, 'torch_tp_plugin': None, 'dynamo_plugin': TorchDynamoPlugin(backend=, mode='default', fullgraph=False, dynamic=False, options=None, disable=False), '_mixed_precision': 'no'} +Rank[2/8] 06/19/2025 14:10:14 INFO train.py:240 | accelerator state: {'_cpu': False, 'backend': 'nccl', 'device': device(type='cuda', index=2), 'debug': False, 'distributed_type': , 'num_processes': 8, 'process_index': 2, 'local_process_index': 2, 'fork_launched': False, 'deepspeed_plugins': None, 'use_ipex': None, 'torch_tp_plugin': None, 'dynamo_plugin': TorchDynamoPlugin(backend=, mode='default', fullgraph=False, dynamic=False, options=None, disable=False), '_mixed_precision': 'no'} +Rank[6/8] 06/19/2025 14:10:14 INFO train.py:239 | if accelerator initialized:True +Rank[4/8] 06/19/2025 14:10:14 INFO train.py:239 | if accelerator initialized:True +Rank[5/8] 06/19/2025 14:10:14 INFO train.py:239 | if accelerator initialized:True +Rank[1/8] 06/19/2025 14:10:14 INFO train.py:239 | if accelerator initialized:True +Rank[7/8] 06/19/2025 14:10:14 INFO train.py:239 | if accelerator initialized:True +Rank[4/8] 06/19/2025 14:10:14 INFO train.py:240 | accelerator state: {'_cpu': False, 'backend': 'nccl', 'device': device(type='cuda', index=4), 'debug': False, 'distributed_type': , 'num_processes': 8, 'process_index': 4, 'local_process_index': 4, 'fork_launched': False, 'deepspeed_plugins': None, 'use_ipex': None, 'torch_tp_plugin': None, 'dynamo_plugin': TorchDynamoPlugin(backend=, mode='default', fullgraph=False, dynamic=False, options=None, disable=False), '_mixed_precision': 'no'} +Rank[6/8] 06/19/2025 14:10:14 INFO train.py:240 | accelerator state: {'_cpu': False, 'backend': 'nccl', 'device': device(type='cuda', index=6), 'debug': False, 'distributed_type': , 'num_processes': 8, 'process_index': 6, 'local_process_index': 6, 'fork_launched': False, 'deepspeed_plugins': None, 'use_ipex': None, 'torch_tp_plugin': None, 'dynamo_plugin': TorchDynamoPlugin(backend=, mode='default', fullgraph=False, dynamic=False, options=None, disable=False), '_mixed_precision': 'no'} +Rank[5/8] 06/19/2025 14:10:14 INFO train.py:240 | accelerator state: {'_cpu': False, 'backend': 'nccl', 'device': device(type='cuda', index=5), 'debug': False, 'distributed_type': , 'num_processes': 8, 'process_index': 5, 'local_process_index': 5, 'fork_launched': False, 'deepspeed_plugins': None, 'use_ipex': None, 'torch_tp_plugin': None, 'dynamo_plugin': TorchDynamoPlugin(backend=, mode='default', fullgraph=False, dynamic=False, options=None, disable=False), '_mixed_precision': 'no'} +Rank[1/8] 06/19/2025 14:10:14 INFO train.py:240 | accelerator state: {'_cpu': False, 'backend': 'nccl', 'device': device(type='cuda', index=1), 'debug': False, 'distributed_type': , 'num_processes': 8, 'process_index': 1, 'local_process_index': 1, 'fork_launched': False, 'deepspeed_plugins': None, 'use_ipex': None, 'torch_tp_plugin': None, 'dynamo_plugin': TorchDynamoPlugin(backend=, mode='default', fullgraph=False, dynamic=False, options=None, disable=False), '_mixed_precision': 'no'} +Rank[7/8] 06/19/2025 14:10:14 INFO train.py:240 | accelerator state: {'_cpu': False, 'backend': 'nccl', 'device': device(type='cuda', index=7), 'debug': False, 'distributed_type': , 'num_processes': 8, 'process_index': 7, 'local_process_index': 7, 'fork_launched': False, 'deepspeed_plugins': None, 'use_ipex': None, 'torch_tp_plugin': None, 'dynamo_plugin': TorchDynamoPlugin(backend=, mode='default', fullgraph=False, dynamic=False, options=None, disable=False), '_mixed_precision': 'no'} +Rank[0/8] 06/19/2025 14:10:14 INFO train.py:135 | +{ + "mode": "det", + "data_version": "v1", + "trainval": false, + "embed_dims": 256, + "with_depth": true, + "with_depth_loss": true, + "min_depth": 0.25, + "max_depth": 10, + "num_depth": 64, + "batch_size": 1, + "val_batch_size": 1, + "step_log_freq": 25, + "num_workers": 8, + "lr": 0.0002, + "eval_only": false, + "checkpoint": "./ckpt/groundingdino_swint_ogc_mmdet-822d7e9d-rename.pth", + "bert_checkpoint": "./ckpt/bert-base-uncased", + "anchor_file": "./anchor_files/embodiedscan_kmeans_det_cam_log_z-0.2-3.npy", + "data_root": "./data/", + "max_epoch": 240, + "epoch_eval_freq": 10, + "save_epoch_freq": 10 +} +/usr/local/lib/python3.10/dist-packages/torch/utils/cpp_extension.py:1965: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. +If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST']. + warnings.warn( +/usr/local/lib/python3.10/dist-packages/torch/utils/cpp_extension.py:1965: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. +If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST']. + warnings.warn( +/usr/local/lib/python3.10/dist-packages/torch/utils/cpp_extension.py:1965: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. +If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST']. + warnings.warn( + Loading Train dataset: 0%| | 0/3113 [00:00