happyhappy-jun commited on Aug 11, 2025

Commit

c210ee6

verified ·

1 Parent(s): 7e7518a

Delete files wandb/ with huggingface_hub

Browse files

Files changed (47) hide show

wandb/debug-internal.log +0 -12
wandb/debug.log +0 -28
wandb/run-20250808_235938-1zaivgpd/files/config.yaml +0 -225
wandb/run-20250808_235938-1zaivgpd/files/output.log +0 -15
wandb/run-20250808_235938-1zaivgpd/files/requirements.txt +0 -154
wandb/run-20250808_235938-1zaivgpd/files/wandb-metadata.json +0 -149
wandb/run-20250808_235938-1zaivgpd/files/wandb-summary.json +0 -1
wandb/run-20250808_235938-1zaivgpd/logs/debug-core.log +0 -15
wandb/run-20250808_235938-1zaivgpd/logs/debug-internal.log +0 -11
wandb/run-20250808_235938-1zaivgpd/logs/debug.log +0 -22
wandb/run-20250808_235938-1zaivgpd/run-1zaivgpd.wandb +0 -3
wandb/run-20250809_000914-ttc1ybny/files/config.yaml +0 -225
wandb/run-20250809_000914-ttc1ybny/files/output.log +0 -77
wandb/run-20250809_000914-ttc1ybny/files/requirements.txt +0 -154
wandb/run-20250809_000914-ttc1ybny/files/wandb-metadata.json +0 -149
wandb/run-20250809_000914-ttc1ybny/files/wandb-summary.json +0 -1
wandb/run-20250809_000914-ttc1ybny/logs/debug-core.log +0 -14
wandb/run-20250809_000914-ttc1ybny/logs/debug-internal.log +0 -11
wandb/run-20250809_000914-ttc1ybny/logs/debug.log +0 -22
wandb/run-20250809_000914-ttc1ybny/run-ttc1ybny.wandb +0 -3
wandb/run-20250809_002817-g4nrjez0/files/config.yaml +0 -225
wandb/run-20250809_002817-g4nrjez0/files/output.log +0 -15
wandb/run-20250809_002817-g4nrjez0/files/requirements.txt +0 -154
wandb/run-20250809_002817-g4nrjez0/files/wandb-metadata.json +0 -149
wandb/run-20250809_002817-g4nrjez0/files/wandb-summary.json +0 -1
wandb/run-20250809_002817-g4nrjez0/logs/debug-core.log +0 -14
wandb/run-20250809_002817-g4nrjez0/logs/debug-internal.log +0 -11
wandb/run-20250809_002817-g4nrjez0/logs/debug.log +0 -22
wandb/run-20250809_002817-g4nrjez0/run-g4nrjez0.wandb +0 -3
wandb/run-20250809_004353-jppa1ary/files/config.yaml +0 -205
wandb/run-20250809_004353-jppa1ary/files/output.log +0 -4
wandb/run-20250809_004353-jppa1ary/files/requirements.txt +0 -154
wandb/run-20250809_004353-jppa1ary/files/wandb-metadata.json +0 -128
wandb/run-20250809_004353-jppa1ary/files/wandb-summary.json +0 -1
wandb/run-20250809_004353-jppa1ary/logs/debug-core.log +0 -16
wandb/run-20250809_004353-jppa1ary/logs/debug-internal.log +0 -12
wandb/run-20250809_004353-jppa1ary/logs/debug.log +0 -28
wandb/run-20250809_004353-jppa1ary/run-jppa1ary.wandb +0 -3
wandb/run-20250809_074602-gpyuprau/files/config.yaml +0 -205
wandb/run-20250809_074602-gpyuprau/files/output.log +0 -4
wandb/run-20250809_074602-gpyuprau/files/requirements.txt +0 -154
wandb/run-20250809_074602-gpyuprau/files/wandb-metadata.json +0 -128
wandb/run-20250809_074602-gpyuprau/files/wandb-summary.json +0 -1
wandb/run-20250809_074602-gpyuprau/logs/debug-core.log +0 -16
wandb/run-20250809_074602-gpyuprau/logs/debug-internal.log +0 -12
wandb/run-20250809_074602-gpyuprau/logs/debug.log +0 -28
wandb/run-20250809_074602-gpyuprau/run-gpyuprau.wandb +0 -3

wandb/debug-internal.log DELETED Viewed

@@ -1,12 +0,0 @@
-{"time":"2025-08-09T00:43:53.899512181Z","level":"INFO","msg":"stream: starting","core version":"0.21.1"}
-{"time":"2025-08-09T00:43:54.352151834Z","level":"INFO","msg":"stream: created new stream","id":"jppa1ary"}
-{"time":"2025-08-09T00:43:54.352229223Z","level":"INFO","msg":"stream: started","id":"jppa1ary"}
-{"time":"2025-08-09T00:43:54.352242749Z","level":"INFO","msg":"handler: started","stream_id":"jppa1ary"}
-{"time":"2025-08-09T00:43:54.352276271Z","level":"INFO","msg":"writer: started","stream_id":"jppa1ary"}
-{"time":"2025-08-09T00:43:54.352258529Z","level":"INFO","msg":"sender: started","stream_id":"jppa1ary"}
-{"time":"2025-08-09T07:37:36.250440803Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-08-09T07:37:36.399127669Z","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading history steps 10395-10395, summary, console lines 1-3","runtime_seconds":0.146154819}],"total_operations":1}}
-{"time":"2025-08-09T07:37:36.508572754Z","level":"INFO","msg":"stream: closing","id":"jppa1ary"}
-{"time":"2025-08-09T07:37:36.508598422Z","level":"INFO","msg":"handler: closed","stream_id":"jppa1ary"}
-{"time":"2025-08-09T07:37:36.509838915Z","level":"INFO","msg":"sender: closed","stream_id":"jppa1ary"}
-{"time":"2025-08-09T07:37:36.509849291Z","level":"INFO","msg":"stream: closed","id":"jppa1ary"}

wandb/debug.log DELETED Viewed

@@ -1,28 +0,0 @@
-2025-08-09 00:43:53,600 INFO    MainThread:332325 [wandb_setup.py:_flush():80] Current SDK version is 0.21.1
-2025-08-09 00:43:53,601 INFO    MainThread:332325 [wandb_setup.py:_flush():80] Configure stats pid to 332325
-2025-08-09 00:43:53,601 INFO    MainThread:332325 [wandb_setup.py:_flush():80] Loading settings from /fsx/byungjun/.config/wandb/settings
-2025-08-09 00:43:53,601 INFO    MainThread:332325 [wandb_setup.py:_flush():80] Loading settings from /fsx/byungjun/openvla-mini/wandb/settings
-2025-08-09 00:43:53,601 INFO    MainThread:332325 [wandb_setup.py:_flush():80] Loading settings from environment variables
-2025-08-09 00:43:53,601 INFO    MainThread:332325 [wandb_init.py:setup_run_log_directory():703] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+7b+stage-finetune+x7/wandb/run-20250809_004353-jppa1ary/logs/debug.log
-2025-08-09 00:43:53,602 INFO    MainThread:332325 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+7b+stage-finetune+x7/wandb/run-20250809_004353-jppa1ary/logs/debug-internal.log
-2025-08-09 00:43:53,602 INFO    MainThread:332325 [wandb_init.py:init():830] calling init triggers
-2025-08-09 00:43:53,602 INFO    MainThread:332325 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
-config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+7b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+7b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-7b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'image_sequence_len': 1, 'align_epochs': 1, 'align_max_steps': None, 'align_save_every_n_steps': 10000, 'align_global_batch_size': 96, 'align_per_device_batch_size': 16, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_save_every_n_steps': 10000, 'finetune_global_batch_size': 128, 'finetune_per_device_batch_size': 4, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data2'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+7b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'prismatic', 'wandb_entity': None, '_wandb': {}}
-2025-08-09 00:43:53,602 INFO    MainThread:332325 [wandb_init.py:init():871] starting backend
-2025-08-09 00:43:53,878 INFO    MainThread:332325 [wandb_init.py:init():874] sending inform_init request
-2025-08-09 00:43:53,896 INFO    MainThread:332325 [wandb_init.py:init():882] backend started and connected
-2025-08-09 00:43:53,901 INFO    MainThread:332325 [wandb_init.py:init():953] updated telemetry
-2025-08-09 00:43:53,957 INFO    MainThread:332325 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
-2025-08-09 00:43:54,674 INFO    MainThread:332325 [wandb_init.py:init():1029] starting run threads in backend
-2025-08-09 00:43:55,488 INFO    MainThread:332325 [wandb_run.py:_console_start():2494] atexit reg
-2025-08-09 00:43:55,488 INFO    MainThread:332325 [wandb_run.py:_redirect():2342] redirect: wrap_raw
-2025-08-09 00:43:55,488 INFO    MainThread:332325 [wandb_run.py:_redirect():2411] Wrapping output streams.
-2025-08-09 00:43:55,488 INFO    MainThread:332325 [wandb_run.py:_redirect():2434] Redirects installed.
-2025-08-09 00:43:55,504 INFO    MainThread:332325 [wandb_init.py:init():1075] run started, returning control to user process
-2025-08-09 07:37:35,382 INFO    MainThread:332325 [wandb_run.py:_finish():2260] finishing run happyhappy/prismatic/jppa1ary
-2025-08-09 07:37:35,385 INFO    MainThread:332325 [wandb_run.py:_atexit_cleanup():2459] got exitcode: 0
-2025-08-09 07:37:35,387 INFO    MainThread:332325 [wandb_run.py:_restore():2441] restore
-2025-08-09 07:37:35,387 INFO    MainThread:332325 [wandb_run.py:_restore():2447] restore done
-2025-08-09 07:37:36,501 INFO    MainThread:332325 [wandb_run.py:_footer_history_summary_info():3895] rendering history
-2025-08-09 07:37:36,502 INFO    MainThread:332325 [wandb_run.py:_footer_history_summary_info():3927] rendering summary
-2025-08-09 07:37:36,502 INFO    MainThread:332325 [wandb_run.py:_footer_sync_info():3856] logging synced files

wandb/run-20250808_235938-1zaivgpd/files/config.yaml DELETED Viewed

@@ -1,225 +0,0 @@
-_wandb:
-    value:
-        cli_version: 0.21.1
-        e:
-            dlllufpnqzysennzlwkr1rfukzvy018p:
-                args:
-                    - --model.type
-                    - prism-qwen25-extra-dinosiglip-224px+3b
-                    - --model.finetune_per_device_batch_size
-                    - "4"
-                codePath: scripts/pretrain.py
-                codePathLocal: scripts/pretrain.py
-                cpu_count: 96
-                cpu_count_logical: 192
-                cudaVersion: "12.8"
-                disk:
-                    /:
-                        total: "520120602624"
-                        used: "64344903680"
-                email: bjyoon513@gmail.com
-                executable: /fsx/byungjun/miniconda3/envs/minivla/bin/python3.10
-                git:
-                    commit: 0822b36227b5a771be4eb2680e34c559734c8fdc
-                    remote: https://github.com/happyhappy-jun/openvla-mini
-                gpu: NVIDIA H200
-                gpu_count: 8
-                gpu_nvidia:
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-95044091-c6a6-4e9d-26a3-0249feeaf796
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-e54a8b43-5dd9-a2f8-8a71-b254a12248ec
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-daed9c7c-6f35-ec0c-abd5-49e1f7d48645
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-acf2a7ee-d8a1-bb8c-be49-c7a07c0f07da
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-0245a021-19ca-991a-61b0-94cbc116d182
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-4213a83d-27d3-97d3-0cec-f9700637d48c
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-8be9f5c6-a214-8b33-0ac2-217892edfa6f
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-5c5fce07-faf7-1345-d5ea-4c13e75769e7
-                host: compute-st-kait-gpu-2
-                memory:
-                    total: "2147425312768"
-                os: Linux-6.8.0-1028-aws-x86_64-with-glibc2.35
-                program: /fsx/byungjun/openvla-mini/scripts/pretrain.py
-                python: CPython 3.10.18
-                root: runs/prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7
-                slurm:
-                    cluster_name: kait-gpu-06-parallelcluster
-                    conf: /opt/slurm/etc/slurm.conf
-                    cpu_bind: quiet,mask_cpu:0xFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF00000000
-                    cpu_bind_list: 0xFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF00000000
-                    cpu_bind_type: 'mask_cpu:'
-                    cpu_bind_verbose: quiet
-                    cpus_on_node: "128"
-                    gpus: "8"
-                    gpus_on_node: "8"
-                    gtids: "0"
-                    job_cpus_per_node: "128"
-                    job_end_time: "1786233065"
-                    job_gid: "1004"
-                    job_group: byungjun
-                    job_id: "527"
-                    job_name: bash
-                    job_nodelist: compute-st-kait-gpu-2
-                    job_num_nodes: "1"
-                    job_partition: batch2
-                    job_start_time: "1754697065"
-                    job_uid: "1004"
-                    job_user: byungjun
-                    jobid: "527"
-                    launch_node_ipaddr: 10.10.47.245
-                    localid: "0"
-                    nnodes: "1"
-                    nodeid: "0"
-                    nodelist: compute-st-kait-gpu-2
-                    nprocs: "1"
-                    ntasks: "1"
-                    prio_process: "0"
-                    procid: "0"
-                    pty_port: "36537"
-                    pty_win_col: "362"
-                    pty_win_row: "84"
-                    srun_comm_host: 10.10.47.245
-                    srun_comm_port: "45601"
-                    step_gpus: 0,1,2,3,4,5,6,7
-                    step_id: "0"
-                    step_launcher_port: "45601"
-                    step_nodelist: compute-st-kait-gpu-2
-                    step_num_nodes: "1"
-                    step_num_tasks: "1"
-                    step_tasks_per_node: "1"
-                    stepid: "0"
-                    submit_dir: /fsx/byungjun/openvla-mini
-                    submit_host: ip-10-10-47-245
-                    task_pid: "299864"
-                    tasks_per_node: "1"
-                    topology_addr: compute-st-kait-gpu-2
-                    topology_addr_pattern: node
-                    umask: "0002"
-                startedAt: "2025-08-08T23:59:38.493368Z"
-                writerId: dlllufpnqzysennzlwkr1rfukzvy018p
-        m: []
-        python_version: 3.10.18
-        t:
-            "1":
-                - 1
-                - 2
-                - 3
-                - 11
-                - 41
-                - 49
-                - 63
-                - 71
-            "2":
-                - 1
-                - 2
-                - 3
-                - 11
-                - 41
-                - 49
-                - 63
-                - 71
-            "3":
-                - 13
-                - 16
-                - 61
-            "4": 3.10.18
-            "5": 0.21.1
-            "6": 4.40.1
-            "12": 0.21.1
-            "13": linux-x86_64
-dataset:
-    value:
-        align_stage_components:
-            - download/llava-laion-cc-sbu-558k/chat.json
-            - download/llava-laion-cc-sbu-558k
-        dataset_id: llava-v15
-        dataset_root_dir: data2
-        finetune_stage_components:
-            - download/llava-v1.5-instruct/llava_v1_5_mix665k.json
-            - download/llava-v1.5-instruct
-        type: llava-v15
-hf_token:
-    value: .hf_token
-model:
-    value:
-        align_epochs: 1
-        align_global_batch_size: 96
-        align_learning_rate: 0.001
-        align_lr_scheduler_type: linear-warmup+cosine-decay
-        align_max_grad_norm: 1
-        align_max_steps: null
-        align_per_device_batch_size: 16
-        align_save_every_n_steps: 10000
-        align_train_strategy: fsdp-shard-grad-op
-        align_warmup_ratio: 0.03
-        align_weight_decay: 0
-        arch_specifier: no-align+fused-gelu-mlp
-        enable_gradient_checkpointing: true
-        enable_mixed_precision_training: true
-        finetune_epochs: 2
-        finetune_global_batch_size: 128
-        finetune_learning_rate: 2e-05
-        finetune_lr_scheduler_type: linear-warmup+cosine-decay
-        finetune_max_grad_norm: 1
-        finetune_max_steps: null
-        finetune_per_device_batch_size: 4
-        finetune_save_every_n_steps: 10000
-        finetune_train_strategy: fsdp-full-shard
-        finetune_warmup_ratio: 0.03
-        finetune_weight_decay: 0.1
-        image_resize_strategy: resize-naive
-        image_sequence_len: 1
-        llm_backbone_id: qwen25-3b-extra
-        llm_max_length: 32768
-        model_id: prism-qwen25-extra-dinosiglip-224px+3b
-        reduce_in_full_precision: false
-        type: prism-qwen25-extra-dinosiglip-224px+3b
-        vision_backbone_id: dinosiglip-vit-so-224px
-pretrained_checkpoint:
-    value: null
-run_id:
-    value: prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7
-run_root_dir:
-    value: runs
-seed:
-    value: 7
-stage:
-    value: finetune
-trackers:
-    value:
-        - jsonl
-        - wandb
-wandb_entity:
-    value: null
-wandb_project:
-    value: prismatic

wandb/run-20250808_235938-1zaivgpd/files/output.log DELETED Viewed

@@ -1,15 +0,0 @@
-[2;36m08/08 [23:59:40][0m[2;36m [0m[34mINFO    [0m | >> [1m[[0m*[1m][0m Starting Training Loop                                                                                                                                                                                                                                                                                                  ]8;id=176211;file:///fsx/byungjun/openvla-mini/scripts/pretrain.py\[2mpretrain.py[0m]8;;\[2m:[0m]8;id=640595;file:///fsx/byungjun/openvla-mini/scripts/pretrain.py#231\[2m231[0m]8;;\
-Traceback (most recent call last):
-  File "/fsx/byungjun/openvla-mini/scripts/pretrain.py", line 245, in <module>
-    pretrain()
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/draccus/argparsing.py", line 203, in wrapper_inner
-    response = fn(cfg, *args, **kwargs)
-  File "/fsx/byungjun/openvla-mini/scripts/pretrain.py", line 232, in pretrain
-    train_strategy.run_training(train_dataset, collator, metrics, stage=cfg.stage, seed=cfg.seed)
-  File "/fsx/byungjun/openvla-mini/prismatic/training/strategies/base_strategy.py", line 215, in run_training
-    normalized_loss.backward()
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/_tensor.py", line 522, in backward
-    torch.autograd.backward(
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/autograd/__init__.py", line 266, in backward
-    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
-KeyboardInterrupt

wandb/run-20250808_235938-1zaivgpd/files/requirements.txt DELETED Viewed

@@ -1,154 +0,0 @@
-nvidia-nvtx-cu12==12.1.105
-kiwisolver==1.4.8
-contourpy==1.3.2
-nvidia-cudnn-cu12==8.9.2.26
-tokenizers==0.19.1
-nvidia-cuda-runtime-cu12==12.1.105
-triton==2.2.0
-hf-xet==1.1.7
-mkl-service==2.4.0
-mkl_random==1.2.8
-pycparser==2.21
-ml-dtypes==0.2.0
-tensorflow==2.15.0
-nvidia-cufft-cu12==11.0.2.54
-pyasn1_modules==0.4.2
-numpy==1.26.4
-numpy==2.0.1
-mypy_extensions==1.1.0
-mkl_fft==1.3.11
-mdurl==0.1.2
-flash-attn==2.5.5
-six==1.17.0
-zipp==3.23.0
-dlimp==0.0.1
-json-numpy==2.1.1
-PySocks==1.7.1
-cffi==1.17.1
-Werkzeug==3.1.3
-rsa==4.9.1
-packaging==25.0
-draccus==0.8.0
-typing-inspection==0.4.1
-Markdown==3.8.2
-wandb==0.21.1
-trimesh==4.7.1
-Pygments==2.19.2
-pillow==11.3.0
-libclang==18.1.1
-typing-inspect==0.9.0
-attrs==25.3.0
-scipy==1.15.3
-scipy==1.11.2
-wrapt==1.14.1
-safetensors==0.6.2
-nvidia-curand-cu12==10.3.2.106
-etils==1.13.0
-OpenEXR==3.3.5
-smmap==5.0.2
-sentencepiece==0.1.99
-pyparsing==3.2.3
-astunparse==1.6.3
-opt_einsum==3.4.0
-tensorflow-graphics==2021.12.3
-fsspec==2025.7.0
-sympy==1.13.3
-timm==0.9.10
-pydantic==2.11.7
-tensorboard==2.15.2
-brotlicffi==1.0.9.2
-torch==2.2.0
-flatbuffers==25.2.10
-filelock==3.17.0
-click==8.2.1
-nvidia-cuda-cupti-cu12==12.1.105
-ninja==1.11.1.4
-typeguard==2.13.3
-nvidia-nccl-cu12==2.19.3
-openvla==0.0.3
-MarkupSafe==3.0.2
-rich==14.1.0
-nvidia-nvjitlink-cu12==12.9.86
-tensorflow-datasets==4.9.3
-tensorflow-io-gcs-filesystem==0.37.1
-networkx==3.4.2
-huggingface-hub==0.34.4
-absl-py==2.3.1
-nvidia-cublas-cu12==12.1.3.1
-torchaudio==2.2.0
-gmpy2==2.2.1
-array_record==0.7.2
-tensorflow-addons==0.23.0
-oauthlib==3.3.1
-PyYAML==6.0.2
-regex==2025.7.34
-nvidia-cuda-nvrtc-cu12==12.1.105
-setuptools==78.1.1
-toml==0.10.2
-google-auth==2.40.3
-certifi==2025.8.3
-keras==2.15.0
-torchvision==0.17.0
-grpcio==1.74.0
-fonttools==4.59.0
-transformers==4.40.1
-annotated-types==0.7.0
-charset-normalizer==3.3.2
-promise==2.3
-mergedeep==1.3.4
-gast==0.6.0
-cachetools==5.5.2
-termcolor==3.1.0
-pyyaml-include==1.4.1
-importlib_resources==6.5.2
-nvidia-cusolver-cu12==11.4.5.107
-h5py==3.14.0
-python-dateutil==2.9.0.post0
-peft==0.11.1
-urllib3==2.5.0
-einops==0.8.1
-tensorflow-estimator==2.15.0
-requests==2.32.4
-psutil==7.0.0
-requests-oauthlib==2.0.0
-pip==25.1
-markdown-it-py==3.0.0
-nvidia-cusparse-cu12==12.1.0.106
-idna==3.7
-tqdm==4.67.1
-dm-tree==0.1.9
-gitdb==4.0.12
-typing_extensions==4.12.2
-matplotlib==3.10.5
-accelerate==1.10.0
-tensorflow-metadata==1.17.2
-sentry-sdk==2.34.1
-jsonlines==4.0.0
-protobuf==4.21.12
-pyasn1==0.6.1
-google-pasta==0.2.0
-mpmath==1.3.0
-Jinja2==3.1.6
-tensorboard-data-server==0.7.2
-pydantic_core==2.33.2
-google-auth-oauthlib==1.2.2
-cycler==0.12.1
-platformdirs==4.3.8
-GitPython==3.1.45
-wheel==0.45.1
-backports.tarfile==1.2.0
-jaraco.collections==5.1.0
-autocommand==2.2.2
-typeguard==4.3.0
-tomli==2.0.1
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-wheel==0.45.1
-more-itertools==10.3.0
-inflect==7.3.1
-jaraco.context==5.3.0
-typing_extensions==4.12.2
-jaraco.functools==4.0.1
-packaging==24.2
-zipp==3.19.2
-jaraco.text==3.12.1

wandb/run-20250808_235938-1zaivgpd/files/wandb-metadata.json DELETED Viewed

@@ -1,149 +0,0 @@
-{
-  "os": "Linux-6.8.0-1028-aws-x86_64-with-glibc2.35",
-  "python": "CPython 3.10.18",
-  "startedAt": "2025-08-08T23:59:38.493368Z",
-  "args": [
-    "--model.type",
-    "prism-qwen25-extra-dinosiglip-224px+3b",
-    "--model.finetune_per_device_batch_size",
-    "4"
-  ],
-  "program": "/fsx/byungjun/openvla-mini/scripts/pretrain.py",
-  "codePath": "scripts/pretrain.py",
-  "codePathLocal": "scripts/pretrain.py",
-  "git": {
-    "remote": "https://github.com/happyhappy-jun/openvla-mini",
-    "commit": "0822b36227b5a771be4eb2680e34c559734c8fdc"
-  },
-  "email": "bjyoon513@gmail.com",
-  "root": "runs/prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7",
-  "host": "compute-st-kait-gpu-2",
-  "executable": "/fsx/byungjun/miniconda3/envs/minivla/bin/python3.10",
-  "cpu_count": 96,
-  "cpu_count_logical": 192,
-  "gpu": "NVIDIA H200",
-  "gpu_count": 8,
-  "disk": {
-    "/": {
-      "total": "520120602624",
-      "used": "64344903680"
-    }
-  },
-  "memory": {
-    "total": "2147425312768"
-  },
-  "gpu_nvidia": [
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-95044091-c6a6-4e9d-26a3-0249feeaf796"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-e54a8b43-5dd9-a2f8-8a71-b254a12248ec"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-daed9c7c-6f35-ec0c-abd5-49e1f7d48645"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-acf2a7ee-d8a1-bb8c-be49-c7a07c0f07da"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-0245a021-19ca-991a-61b0-94cbc116d182"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-4213a83d-27d3-97d3-0cec-f9700637d48c"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-8be9f5c6-a214-8b33-0ac2-217892edfa6f"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-5c5fce07-faf7-1345-d5ea-4c13e75769e7"
-    }
-  ],
-  "cudaVersion": "12.8",
-  "slurm": {
-    "cluster_name": "kait-gpu-06-parallelcluster",
-    "conf": "/opt/slurm/etc/slurm.conf",
-    "cpu_bind": "quiet,mask_cpu:0xFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF00000000",
-    "cpu_bind_list": "0xFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF00000000",
-    "cpu_bind_type": "mask_cpu:",
-    "cpu_bind_verbose": "quiet",
-    "cpus_on_node": "128",
-    "gpus": "8",
-    "gpus_on_node": "8",
-    "gtids": "0",
-    "job_cpus_per_node": "128",
-    "job_end_time": "1786233065",
-    "job_gid": "1004",
-    "job_group": "byungjun",
-    "job_id": "527",
-    "job_name": "bash",
-    "job_nodelist": "compute-st-kait-gpu-2",
-    "job_num_nodes": "1",
-    "job_partition": "batch2",
-    "job_start_time": "1754697065",
-    "job_uid": "1004",
-    "job_user": "byungjun",
-    "jobid": "527",
-    "launch_node_ipaddr": "10.10.47.245",
-    "localid": "0",
-    "nnodes": "1",
-    "nodeid": "0",
-    "nodelist": "compute-st-kait-gpu-2",
-    "nprocs": "1",
-    "ntasks": "1",
-    "prio_process": "0",
-    "procid": "0",
-    "pty_port": "36537",
-    "pty_win_col": "362",
-    "pty_win_row": "84",
-    "srun_comm_host": "10.10.47.245",
-    "srun_comm_port": "45601",
-    "step_gpus": "0,1,2,3,4,5,6,7",
-    "step_id": "0",
-    "step_launcher_port": "45601",
-    "step_nodelist": "compute-st-kait-gpu-2",
-    "step_num_nodes": "1",
-    "step_num_tasks": "1",
-    "step_tasks_per_node": "1",
-    "stepid": "0",
-    "submit_dir": "/fsx/byungjun/openvla-mini",
-    "submit_host": "ip-10-10-47-245",
-    "task_pid": "299864",
-    "tasks_per_node": "1",
-    "topology_addr": "compute-st-kait-gpu-2",
-    "topology_addr_pattern": "node",
-    "umask": "0002"
-  },
-  "writerId": "dlllufpnqzysennzlwkr1rfukzvy018p"
-}

wandb/run-20250808_235938-1zaivgpd/files/wandb-summary.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"Finetune/Loss (Raw)":1.1874206066131592,"_runtime":315.461188507,"Finetune/Step":139,"Finetune/Step Time":2.147124085575342,"_wandb":{"runtime":315},"_step":139,"Finetune/Loss":1.376694917678833,"Finetune/Learning Rate":8.938906752411576e-06,"_timestamp":1.754697891733366e+09}

wandb/run-20250808_235938-1zaivgpd/logs/debug-core.log DELETED Viewed

@@ -1,15 +0,0 @@
-{"time":"2025-08-08T23:59:38.818964529Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpg7m_uafm/port-303339.txt","pid":303339,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
-{"time":"2025-08-08T23:59:38.820143837Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":303339}
-{"time":"2025-08-08T23:59:38.820124086Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-303339-306877-1688353627/socket","Net":"unix"}}
-{"time":"2025-08-08T23:59:38.881306717Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
-{"time":"2025-08-08T23:59:38.908237591Z","level":"INFO","msg":"handleInformInit: received","streamId":"1zaivgpd","id":"1(@)"}
-{"time":"2025-08-08T23:59:39.35258466Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"1zaivgpd","id":"1(@)"}
-{"time":"2025-08-09T00:04:55.173312507Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
-{"time":"2025-08-09T00:04:55.175139044Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-08-09T00:04:55.175126277Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
-{"time":"2025-08-09T00:04:55.175287385Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
-{"time":"2025-08-09T00:04:55.175280812Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-303339-306877-1688353627/socket","Net":"unix"}}
-{"time":"2025-08-09T00:04:55.453401756Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write unix /tmp/wandb-303339-306877-1688353627/socket->@: use of closed network connection","id":"1(@)"}
-{"time":"2025-08-09T00:04:56.351204608Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
-{"time":"2025-08-09T00:04:56.351239091Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
-{"time":"2025-08-09T00:04:56.351249091Z","level":"INFO","msg":"server is closed"}

wandb/run-20250808_235938-1zaivgpd/logs/debug-internal.log DELETED Viewed

@@ -1,11 +0,0 @@
-{"time":"2025-08-08T23:59:38.909835986Z","level":"INFO","msg":"stream: starting","core version":"0.21.1"}
-{"time":"2025-08-08T23:59:39.352490498Z","level":"INFO","msg":"stream: created new stream","id":"1zaivgpd"}
-{"time":"2025-08-08T23:59:39.352575985Z","level":"INFO","msg":"stream: started","id":"1zaivgpd"}
-{"time":"2025-08-08T23:59:39.352597975Z","level":"INFO","msg":"writer: started","stream_id":"1zaivgpd"}
-{"time":"2025-08-08T23:59:39.35262014Z","level":"INFO","msg":"handler: started","stream_id":"1zaivgpd"}
-{"time":"2025-08-08T23:59:39.352635811Z","level":"INFO","msg":"sender: started","stream_id":"1zaivgpd"}
-{"time":"2025-08-09T00:04:55.17512262Z","level":"INFO","msg":"stream: closing","id":"1zaivgpd"}
-{"time":"2025-08-09T00:04:56.108748537Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-08-09T00:04:56.336618603Z","level":"INFO","msg":"handler: closed","stream_id":"1zaivgpd"}
-{"time":"2025-08-09T00:04:56.337591218Z","level":"INFO","msg":"sender: closed","stream_id":"1zaivgpd"}
-{"time":"2025-08-09T00:04:56.337616655Z","level":"INFO","msg":"stream: closed","id":"1zaivgpd"}

wandb/run-20250808_235938-1zaivgpd/logs/debug.log DELETED Viewed

@@ -1,22 +0,0 @@
-2025-08-08 23:59:38,580 INFO    MainThread:303339 [wandb_setup.py:_flush():80] Current SDK version is 0.21.1
-2025-08-08 23:59:38,582 INFO    MainThread:303339 [wandb_setup.py:_flush():80] Configure stats pid to 303339
-2025-08-08 23:59:38,582 INFO    MainThread:303339 [wandb_setup.py:_flush():80] Loading settings from /fsx/byungjun/.config/wandb/settings
-2025-08-08 23:59:38,582 INFO    MainThread:303339 [wandb_setup.py:_flush():80] Loading settings from /fsx/byungjun/openvla-mini/wandb/settings
-2025-08-08 23:59:38,582 INFO    MainThread:303339 [wandb_setup.py:_flush():80] Loading settings from environment variables
-2025-08-08 23:59:38,583 INFO    MainThread:303339 [wandb_init.py:setup_run_log_directory():703] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7/wandb/run-20250808_235938-1zaivgpd/logs/debug.log
-2025-08-08 23:59:38,583 INFO    MainThread:303339 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7/wandb/run-20250808_235938-1zaivgpd/logs/debug-internal.log
-2025-08-08 23:59:38,583 INFO    MainThread:303339 [wandb_init.py:init():830] calling init triggers
-2025-08-08 23:59:38,583 INFO    MainThread:303339 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
-config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+3b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+3b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-3b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'image_sequence_len': 1, 'align_epochs': 1, 'align_max_steps': None, 'align_save_every_n_steps': 10000, 'align_global_batch_size': 96, 'align_per_device_batch_size': 16, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_save_every_n_steps': 10000, 'finetune_global_batch_size': 128, 'finetune_per_device_batch_size': 4, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data2'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'prismatic', 'wandb_entity': None, '_wandb': {}}
-2025-08-08 23:59:38,583 INFO    MainThread:303339 [wandb_init.py:init():871] starting backend
-2025-08-08 23:59:38,881 INFO    MainThread:303339 [wandb_init.py:init():874] sending inform_init request
-2025-08-08 23:59:38,906 INFO    MainThread:303339 [wandb_init.py:init():882] backend started and connected
-2025-08-08 23:59:38,910 INFO    MainThread:303339 [wandb_init.py:init():953] updated telemetry
-2025-08-08 23:59:38,936 INFO    MainThread:303339 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
-2025-08-08 23:59:39,705 INFO    MainThread:303339 [wandb_init.py:init():1029] starting run threads in backend
-2025-08-08 23:59:40,160 INFO    MainThread:303339 [wandb_run.py:_console_start():2494] atexit reg
-2025-08-08 23:59:40,160 INFO    MainThread:303339 [wandb_run.py:_redirect():2342] redirect: wrap_raw
-2025-08-08 23:59:40,161 INFO    MainThread:303339 [wandb_run.py:_redirect():2411] Wrapping output streams.
-2025-08-08 23:59:40,161 INFO    MainThread:303339 [wandb_run.py:_redirect():2434] Redirects installed.
-2025-08-08 23:59:40,182 INFO    MainThread:303339 [wandb_init.py:init():1075] run started, returning control to user process
-2025-08-09 00:04:55,170 INFO    MsgRouterThr:303339 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 2 handles.

wandb/run-20250808_235938-1zaivgpd/run-1zaivgpd.wandb DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7420f99144bacccd6ae63f4fd694e0216260b0d5982b593d758940c5c5ead903
-size 289299

wandb/run-20250809_000914-ttc1ybny/files/config.yaml DELETED Viewed

@@ -1,225 +0,0 @@
-_wandb:
-    value:
-        cli_version: 0.21.1
-        e:
-            3edib02cci947rvwid7aggk7s2h7y6r3:
-                args:
-                    - --model.type
-                    - prism-qwen25-extra-dinosiglip-224px+3b
-                    - --model.finetune_per_device_batch_size
-                    - "8"
-                codePath: scripts/pretrain.py
-                codePathLocal: scripts/pretrain.py
-                cpu_count: 96
-                cpu_count_logical: 192
-                cudaVersion: "12.8"
-                disk:
-                    /:
-                        total: "520120602624"
-                        used: "64347447296"
-                email: bjyoon513@gmail.com
-                executable: /fsx/byungjun/miniconda3/envs/minivla/bin/python3.10
-                git:
-                    commit: 0822b36227b5a771be4eb2680e34c559734c8fdc
-                    remote: https://github.com/happyhappy-jun/openvla-mini
-                gpu: NVIDIA H200
-                gpu_count: 8
-                gpu_nvidia:
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-95044091-c6a6-4e9d-26a3-0249feeaf796
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-e54a8b43-5dd9-a2f8-8a71-b254a12248ec
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-daed9c7c-6f35-ec0c-abd5-49e1f7d48645
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-acf2a7ee-d8a1-bb8c-be49-c7a07c0f07da
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-0245a021-19ca-991a-61b0-94cbc116d182
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-4213a83d-27d3-97d3-0cec-f9700637d48c
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-8be9f5c6-a214-8b33-0ac2-217892edfa6f
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-5c5fce07-faf7-1345-d5ea-4c13e75769e7
-                host: compute-st-kait-gpu-2
-                memory:
-                    total: "2147425312768"
-                os: Linux-6.8.0-1028-aws-x86_64-with-glibc2.35
-                program: /fsx/byungjun/openvla-mini/scripts/pretrain.py
-                python: CPython 3.10.18
-                root: runs/prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7
-                slurm:
-                    cluster_name: kait-gpu-06-parallelcluster
-                    conf: /opt/slurm/etc/slurm.conf
-                    cpu_bind: quiet,mask_cpu:0xFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF00000000
-                    cpu_bind_list: 0xFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF00000000
-                    cpu_bind_type: 'mask_cpu:'
-                    cpu_bind_verbose: quiet
-                    cpus_on_node: "128"
-                    gpus: "8"
-                    gpus_on_node: "8"
-                    gtids: "0"
-                    job_cpus_per_node: "128"
-                    job_end_time: "1786233065"
-                    job_gid: "1004"
-                    job_group: byungjun
-                    job_id: "527"
-                    job_name: bash
-                    job_nodelist: compute-st-kait-gpu-2
-                    job_num_nodes: "1"
-                    job_partition: batch2
-                    job_start_time: "1754697065"
-                    job_uid: "1004"
-                    job_user: byungjun
-                    jobid: "527"
-                    launch_node_ipaddr: 10.10.47.245
-                    localid: "0"
-                    nnodes: "1"
-                    nodeid: "0"
-                    nodelist: compute-st-kait-gpu-2
-                    nprocs: "1"
-                    ntasks: "1"
-                    prio_process: "0"
-                    procid: "0"
-                    pty_port: "36537"
-                    pty_win_col: "362"
-                    pty_win_row: "84"
-                    srun_comm_host: 10.10.47.245
-                    srun_comm_port: "45601"
-                    step_gpus: 0,1,2,3,4,5,6,7
-                    step_id: "0"
-                    step_launcher_port: "45601"
-                    step_nodelist: compute-st-kait-gpu-2
-                    step_num_nodes: "1"
-                    step_num_tasks: "1"
-                    step_tasks_per_node: "1"
-                    stepid: "0"
-                    submit_dir: /fsx/byungjun/openvla-mini
-                    submit_host: ip-10-10-47-245
-                    task_pid: "299864"
-                    tasks_per_node: "1"
-                    topology_addr: compute-st-kait-gpu-2
-                    topology_addr_pattern: node
-                    umask: "0002"
-                startedAt: "2025-08-09T00:09:14.729065Z"
-                writerId: 3edib02cci947rvwid7aggk7s2h7y6r3
-        m: []
-        python_version: 3.10.18
-        t:
-            "1":
-                - 1
-                - 2
-                - 3
-                - 11
-                - 41
-                - 49
-                - 63
-                - 71
-            "2":
-                - 1
-                - 2
-                - 3
-                - 11
-                - 41
-                - 49
-                - 63
-                - 71
-            "3":
-                - 13
-                - 16
-                - 61
-            "4": 3.10.18
-            "5": 0.21.1
-            "6": 4.40.1
-            "12": 0.21.1
-            "13": linux-x86_64
-dataset:
-    value:
-        align_stage_components:
-            - download/llava-laion-cc-sbu-558k/chat.json
-            - download/llava-laion-cc-sbu-558k
-        dataset_id: llava-v15
-        dataset_root_dir: data2
-        finetune_stage_components:
-            - download/llava-v1.5-instruct/llava_v1_5_mix665k.json
-            - download/llava-v1.5-instruct
-        type: llava-v15
-hf_token:
-    value: .hf_token
-model:
-    value:
-        align_epochs: 1
-        align_global_batch_size: 96
-        align_learning_rate: 0.001
-        align_lr_scheduler_type: linear-warmup+cosine-decay
-        align_max_grad_norm: 1
-        align_max_steps: null
-        align_per_device_batch_size: 16
-        align_save_every_n_steps: 10000
-        align_train_strategy: fsdp-shard-grad-op
-        align_warmup_ratio: 0.03
-        align_weight_decay: 0
-        arch_specifier: no-align+fused-gelu-mlp
-        enable_gradient_checkpointing: true
-        enable_mixed_precision_training: true
-        finetune_epochs: 2
-        finetune_global_batch_size: 128
-        finetune_learning_rate: 2e-05
-        finetune_lr_scheduler_type: linear-warmup+cosine-decay
-        finetune_max_grad_norm: 1
-        finetune_max_steps: null
-        finetune_per_device_batch_size: 8
-        finetune_save_every_n_steps: 10000
-        finetune_train_strategy: fsdp-full-shard
-        finetune_warmup_ratio: 0.03
-        finetune_weight_decay: 0.1
-        image_resize_strategy: resize-naive
-        image_sequence_len: 1
-        llm_backbone_id: qwen25-3b-extra
-        llm_max_length: 32768
-        model_id: prism-qwen25-extra-dinosiglip-224px+3b
-        reduce_in_full_precision: false
-        type: prism-qwen25-extra-dinosiglip-224px+3b
-        vision_backbone_id: dinosiglip-vit-so-224px
-pretrained_checkpoint:
-    value: null
-run_id:
-    value: prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7
-run_root_dir:
-    value: runs
-seed:
-    value: 7
-stage:
-    value: finetune
-trackers:
-    value:
-        - jsonl
-        - wandb
-wandb_entity:
-    value: null
-wandb_project:
-    value: prismatic

wandb/run-20250809_000914-ttc1ybny/files/output.log DELETED Viewed

@@ -1,77 +0,0 @@
-[2;36m08/09 [00:09:16][0m[2;36m [0m[34mINFO    [0m | >> [1m[[0m*[1m][0m Starting Training Loop                                                                                                                                                                                                                                                                                                  ]8;id=176211;file:///fsx/byungjun/openvla-mini/scripts/pretrain.py\[2mpretrain.py[0m]8;;\[2m:[0m]8;id=640595;file:///fsx/byungjun/openvla-mini/scripts/pretrain.py#231\[2m231[0m]8;;\
-Traceback (most recent call last):
-  File "/fsx/byungjun/openvla-mini/scripts/pretrain.py", line 245, in <module>
-    pretrain()
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/draccus/argparsing.py", line 203, in wrapper_inner
-    response = fn(cfg, *args, **kwargs)
-  File "/fsx/byungjun/openvla-mini/scripts/pretrain.py", line 232, in pretrain
-    train_strategy.run_training(train_dataset, collator, metrics, stage=cfg.stage, seed=cfg.seed)
-  File "/fsx/byungjun/openvla-mini/prismatic/training/strategies/base_strategy.py", line 190, in run_training
-    output: CausalLMOutputWithPast = self.vlm(
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
-    return forward_call(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 849, in forward
-    output = self._fsdp_wrapped_module(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
-    return forward_call(*args, **kwargs)
-  File "/fsx/byungjun/openvla-mini/prismatic/models/vlms/prismatic.py", line 470, in forward
-    return self.llm_backbone(
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
-    return forward_call(*args, **kwargs)
-  File "/fsx/byungjun/openvla-mini/prismatic/models/backbones/llm/base_llm.py", line 222, in forward
-    output: CausalLMOutputWithPast = self.llm(
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
-    return forward_call(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 1169, in forward
-    outputs = self.model(
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
-    return forward_call(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 1054, in forward
-    layer_outputs = decoder_layer(
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
-    return forward_call(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 849, in forward
-    output = self._fsdp_wrapped_module(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
-    return forward_call(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/distributed/algorithms/_checkpoint/checkpoint_wrapper.py", line 168, in forward
-    return self.checkpoint_fn(  # type: ignore[misc]
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/_compile.py", line 24, in inner
-    return torch._dynamo.disable(fn, recursive)(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 489, in _fn
-    return fn(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/_dynamo/external_utils.py", line 17, in inner
-    return fn(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 489, in checkpoint
-    ret = function(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
-    return forward_call(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 765, in forward
-    hidden_states = self.input_layernorm(hidden_states)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
-    return self._call_impl(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
-    return forward_call(*args, **kwargs)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 89, in forward
-    hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 1094, in pack_hook
-    with torch.no_grad():
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/autograd/grad_mode.py", line 76, in __init__
-    super().__init__()
-KeyboardInterrupt

wandb/run-20250809_000914-ttc1ybny/files/requirements.txt DELETED Viewed

@@ -1,154 +0,0 @@
-nvidia-nvtx-cu12==12.1.105
-kiwisolver==1.4.8
-contourpy==1.3.2
-nvidia-cudnn-cu12==8.9.2.26
-tokenizers==0.19.1
-nvidia-cuda-runtime-cu12==12.1.105
-triton==2.2.0
-hf-xet==1.1.7
-mkl-service==2.4.0
-mkl_random==1.2.8
-pycparser==2.21
-ml-dtypes==0.2.0
-tensorflow==2.15.0
-nvidia-cufft-cu12==11.0.2.54
-pyasn1_modules==0.4.2
-numpy==1.26.4
-numpy==2.0.1
-mypy_extensions==1.1.0
-mkl_fft==1.3.11
-mdurl==0.1.2
-flash-attn==2.5.5
-six==1.17.0
-zipp==3.23.0
-dlimp==0.0.1
-json-numpy==2.1.1
-PySocks==1.7.1
-cffi==1.17.1
-Werkzeug==3.1.3
-rsa==4.9.1
-packaging==25.0
-draccus==0.8.0
-typing-inspection==0.4.1
-Markdown==3.8.2
-wandb==0.21.1
-trimesh==4.7.1
-Pygments==2.19.2
-pillow==11.3.0
-libclang==18.1.1
-typing-inspect==0.9.0
-attrs==25.3.0
-scipy==1.15.3
-scipy==1.11.2
-wrapt==1.14.1
-safetensors==0.6.2
-nvidia-curand-cu12==10.3.2.106
-etils==1.13.0
-OpenEXR==3.3.5
-smmap==5.0.2
-sentencepiece==0.1.99
-pyparsing==3.2.3
-astunparse==1.6.3
-opt_einsum==3.4.0
-tensorflow-graphics==2021.12.3
-fsspec==2025.7.0
-sympy==1.13.3
-timm==0.9.10
-pydantic==2.11.7
-tensorboard==2.15.2
-brotlicffi==1.0.9.2
-torch==2.2.0
-flatbuffers==25.2.10
-filelock==3.17.0
-click==8.2.1
-nvidia-cuda-cupti-cu12==12.1.105
-ninja==1.11.1.4
-typeguard==2.13.3
-nvidia-nccl-cu12==2.19.3
-openvla==0.0.3
-MarkupSafe==3.0.2
-rich==14.1.0
-nvidia-nvjitlink-cu12==12.9.86
-tensorflow-datasets==4.9.3
-tensorflow-io-gcs-filesystem==0.37.1
-networkx==3.4.2
-huggingface-hub==0.34.4
-absl-py==2.3.1
-nvidia-cublas-cu12==12.1.3.1
-torchaudio==2.2.0
-gmpy2==2.2.1
-array_record==0.7.2
-tensorflow-addons==0.23.0
-oauthlib==3.3.1
-PyYAML==6.0.2
-regex==2025.7.34
-nvidia-cuda-nvrtc-cu12==12.1.105
-setuptools==78.1.1
-toml==0.10.2
-google-auth==2.40.3
-certifi==2025.8.3
-keras==2.15.0
-torchvision==0.17.0
-grpcio==1.74.0
-fonttools==4.59.0
-transformers==4.40.1
-annotated-types==0.7.0
-charset-normalizer==3.3.2
-promise==2.3
-mergedeep==1.3.4
-gast==0.6.0
-cachetools==5.5.2
-termcolor==3.1.0
-pyyaml-include==1.4.1
-importlib_resources==6.5.2
-nvidia-cusolver-cu12==11.4.5.107
-h5py==3.14.0
-python-dateutil==2.9.0.post0
-peft==0.11.1
-urllib3==2.5.0
-einops==0.8.1
-tensorflow-estimator==2.15.0
-requests==2.32.4
-psutil==7.0.0
-requests-oauthlib==2.0.0
-pip==25.1
-markdown-it-py==3.0.0
-nvidia-cusparse-cu12==12.1.0.106
-idna==3.7
-tqdm==4.67.1
-dm-tree==0.1.9
-gitdb==4.0.12
-typing_extensions==4.12.2
-matplotlib==3.10.5
-accelerate==1.10.0
-tensorflow-metadata==1.17.2
-sentry-sdk==2.34.1
-jsonlines==4.0.0
-protobuf==4.21.12
-pyasn1==0.6.1
-google-pasta==0.2.0
-mpmath==1.3.0
-Jinja2==3.1.6
-tensorboard-data-server==0.7.2
-pydantic_core==2.33.2
-google-auth-oauthlib==1.2.2
-cycler==0.12.1
-platformdirs==4.3.8
-GitPython==3.1.45
-wheel==0.45.1
-backports.tarfile==1.2.0
-jaraco.collections==5.1.0
-autocommand==2.2.2
-typeguard==4.3.0
-tomli==2.0.1
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-wheel==0.45.1
-more-itertools==10.3.0
-inflect==7.3.1
-jaraco.context==5.3.0
-typing_extensions==4.12.2
-jaraco.functools==4.0.1
-packaging==24.2
-zipp==3.19.2
-jaraco.text==3.12.1

wandb/run-20250809_000914-ttc1ybny/files/wandb-metadata.json DELETED Viewed

@@ -1,149 +0,0 @@
-{
-  "os": "Linux-6.8.0-1028-aws-x86_64-with-glibc2.35",
-  "python": "CPython 3.10.18",
-  "startedAt": "2025-08-09T00:09:14.729065Z",
-  "args": [
-    "--model.type",
-    "prism-qwen25-extra-dinosiglip-224px+3b",
-    "--model.finetune_per_device_batch_size",
-    "8"
-  ],
-  "program": "/fsx/byungjun/openvla-mini/scripts/pretrain.py",
-  "codePath": "scripts/pretrain.py",
-  "codePathLocal": "scripts/pretrain.py",
-  "git": {
-    "remote": "https://github.com/happyhappy-jun/openvla-mini",
-    "commit": "0822b36227b5a771be4eb2680e34c559734c8fdc"
-  },
-  "email": "bjyoon513@gmail.com",
-  "root": "runs/prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7",
-  "host": "compute-st-kait-gpu-2",
-  "executable": "/fsx/byungjun/miniconda3/envs/minivla/bin/python3.10",
-  "cpu_count": 96,
-  "cpu_count_logical": 192,
-  "gpu": "NVIDIA H200",
-  "gpu_count": 8,
-  "disk": {
-    "/": {
-      "total": "520120602624",
-      "used": "64347447296"
-    }
-  },
-  "memory": {
-    "total": "2147425312768"
-  },
-  "gpu_nvidia": [
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-95044091-c6a6-4e9d-26a3-0249feeaf796"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-e54a8b43-5dd9-a2f8-8a71-b254a12248ec"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-daed9c7c-6f35-ec0c-abd5-49e1f7d48645"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-acf2a7ee-d8a1-bb8c-be49-c7a07c0f07da"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-0245a021-19ca-991a-61b0-94cbc116d182"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-4213a83d-27d3-97d3-0cec-f9700637d48c"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-8be9f5c6-a214-8b33-0ac2-217892edfa6f"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-5c5fce07-faf7-1345-d5ea-4c13e75769e7"
-    }
-  ],
-  "cudaVersion": "12.8",
-  "slurm": {
-    "cluster_name": "kait-gpu-06-parallelcluster",
-    "conf": "/opt/slurm/etc/slurm.conf",
-    "cpu_bind": "quiet,mask_cpu:0xFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF00000000",
-    "cpu_bind_list": "0xFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF00000000",
-    "cpu_bind_type": "mask_cpu:",
-    "cpu_bind_verbose": "quiet",
-    "cpus_on_node": "128",
-    "gpus": "8",
-    "gpus_on_node": "8",
-    "gtids": "0",
-    "job_cpus_per_node": "128",
-    "job_end_time": "1786233065",
-    "job_gid": "1004",
-    "job_group": "byungjun",
-    "job_id": "527",
-    "job_name": "bash",
-    "job_nodelist": "compute-st-kait-gpu-2",
-    "job_num_nodes": "1",
-    "job_partition": "batch2",
-    "job_start_time": "1754697065",
-    "job_uid": "1004",
-    "job_user": "byungjun",
-    "jobid": "527",
-    "launch_node_ipaddr": "10.10.47.245",
-    "localid": "0",
-    "nnodes": "1",
-    "nodeid": "0",
-    "nodelist": "compute-st-kait-gpu-2",
-    "nprocs": "1",
-    "ntasks": "1",
-    "prio_process": "0",
-    "procid": "0",
-    "pty_port": "36537",
-    "pty_win_col": "362",
-    "pty_win_row": "84",
-    "srun_comm_host": "10.10.47.245",
-    "srun_comm_port": "45601",
-    "step_gpus": "0,1,2,3,4,5,6,7",
-    "step_id": "0",
-    "step_launcher_port": "45601",
-    "step_nodelist": "compute-st-kait-gpu-2",
-    "step_num_nodes": "1",
-    "step_num_tasks": "1",
-    "step_tasks_per_node": "1",
-    "stepid": "0",
-    "submit_dir": "/fsx/byungjun/openvla-mini",
-    "submit_host": "ip-10-10-47-245",
-    "task_pid": "299864",
-    "tasks_per_node": "1",
-    "topology_addr": "compute-st-kait-gpu-2",
-    "topology_addr_pattern": "node",
-    "umask": "0002"
-  },
-  "writerId": "3edib02cci947rvwid7aggk7s2h7y6r3"
-}

wandb/run-20250809_000914-ttc1ybny/files/wandb-summary.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"_wandb":{"runtime":236},"_runtime":236.322617231,"Finetune/Step":167,"Finetune/Loss":1.3435916900634766,"Finetune/Learning Rate":1.0739549839228296e-05,"_step":167,"Finetune/Step Time":1.3200225681066513,"Finetune/Loss (Raw)":1.0089986324310303,"_timestamp":1.7546983897202375e+09}

wandb/run-20250809_000914-ttc1ybny/logs/debug-core.log DELETED Viewed

@@ -1,14 +0,0 @@
-{"time":"2025-08-09T00:09:14.998998464Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmppz5qe2qs/port-310696.txt","pid":310696,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
-{"time":"2025-08-09T00:09:15.000519302Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":310696}
-{"time":"2025-08-09T00:09:15.000521507Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-310696-312789-804370854/socket","Net":"unix"}}
-{"time":"2025-08-09T00:09:15.058183028Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
-{"time":"2025-08-09T00:09:15.071139886Z","level":"INFO","msg":"handleInformInit: received","streamId":"ttc1ybny","id":"1(@)"}
-{"time":"2025-08-09T00:09:15.519038972Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"ttc1ybny","id":"1(@)"}
-{"time":"2025-08-09T00:13:12.129122821Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
-{"time":"2025-08-09T00:13:12.130510275Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-08-09T00:13:12.130613294Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-310696-312789-804370854/socket","Net":"unix"}}
-{"time":"2025-08-09T00:13:12.130494733Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
-{"time":"2025-08-09T00:13:12.130698195Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
-{"time":"2025-08-09T00:13:12.944751942Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
-{"time":"2025-08-09T00:13:12.944772087Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
-{"time":"2025-08-09T00:13:12.944783918Z","level":"INFO","msg":"server is closed"}

wandb/run-20250809_000914-ttc1ybny/logs/debug-internal.log DELETED Viewed

@@ -1,11 +0,0 @@
-{"time":"2025-08-09T00:09:15.073794487Z","level":"INFO","msg":"stream: starting","core version":"0.21.1"}
-{"time":"2025-08-09T00:09:15.518973292Z","level":"INFO","msg":"stream: created new stream","id":"ttc1ybny"}
-{"time":"2025-08-09T00:09:15.519031018Z","level":"INFO","msg":"stream: started","id":"ttc1ybny"}
-{"time":"2025-08-09T00:09:15.519059671Z","level":"INFO","msg":"writer: started","stream_id":"ttc1ybny"}
-{"time":"2025-08-09T00:09:15.519071384Z","level":"INFO","msg":"handler: started","stream_id":"ttc1ybny"}
-{"time":"2025-08-09T00:09:15.519090603Z","level":"INFO","msg":"sender: started","stream_id":"ttc1ybny"}
-{"time":"2025-08-09T00:13:12.130505394Z","level":"INFO","msg":"stream: closing","id":"ttc1ybny"}
-{"time":"2025-08-09T00:13:12.667114203Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-08-09T00:13:12.930183192Z","level":"INFO","msg":"handler: closed","stream_id":"ttc1ybny"}
-{"time":"2025-08-09T00:13:12.931064677Z","level":"INFO","msg":"sender: closed","stream_id":"ttc1ybny"}
-{"time":"2025-08-09T00:13:12.931082577Z","level":"INFO","msg":"stream: closed","id":"ttc1ybny"}

wandb/run-20250809_000914-ttc1ybny/logs/debug.log DELETED Viewed

@@ -1,22 +0,0 @@
-2025-08-09 00:09:14,807 INFO    MainThread:310696 [wandb_setup.py:_flush():80] Current SDK version is 0.21.1
-2025-08-09 00:09:14,808 INFO    MainThread:310696 [wandb_setup.py:_flush():80] Configure stats pid to 310696
-2025-08-09 00:09:14,808 INFO    MainThread:310696 [wandb_setup.py:_flush():80] Loading settings from /fsx/byungjun/.config/wandb/settings
-2025-08-09 00:09:14,808 INFO    MainThread:310696 [wandb_setup.py:_flush():80] Loading settings from /fsx/byungjun/openvla-mini/wandb/settings
-2025-08-09 00:09:14,808 INFO    MainThread:310696 [wandb_setup.py:_flush():80] Loading settings from environment variables
-2025-08-09 00:09:14,808 INFO    MainThread:310696 [wandb_init.py:setup_run_log_directory():703] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7/wandb/run-20250809_000914-ttc1ybny/logs/debug.log
-2025-08-09 00:09:14,808 INFO    MainThread:310696 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7/wandb/run-20250809_000914-ttc1ybny/logs/debug-internal.log
-2025-08-09 00:09:14,808 INFO    MainThread:310696 [wandb_init.py:init():830] calling init triggers
-2025-08-09 00:09:14,808 INFO    MainThread:310696 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
-config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+3b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+3b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-3b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'image_sequence_len': 1, 'align_epochs': 1, 'align_max_steps': None, 'align_save_every_n_steps': 10000, 'align_global_batch_size': 96, 'align_per_device_batch_size': 16, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_save_every_n_steps': 10000, 'finetune_global_batch_size': 128, 'finetune_per_device_batch_size': 8, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data2'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'prismatic', 'wandb_entity': None, '_wandb': {}}
-2025-08-09 00:09:14,809 INFO    MainThread:310696 [wandb_init.py:init():871] starting backend
-2025-08-09 00:09:15,058 INFO    MainThread:310696 [wandb_init.py:init():874] sending inform_init request
-2025-08-09 00:09:15,069 INFO    MainThread:310696 [wandb_init.py:init():882] backend started and connected
-2025-08-09 00:09:15,073 INFO    MainThread:310696 [wandb_init.py:init():953] updated telemetry
-2025-08-09 00:09:15,109 INFO    MainThread:310696 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
-2025-08-09 00:09:15,806 INFO    MainThread:310696 [wandb_init.py:init():1029] starting run threads in backend
-2025-08-09 00:09:16,420 INFO    MainThread:310696 [wandb_run.py:_console_start():2494] atexit reg
-2025-08-09 00:09:16,420 INFO    MainThread:310696 [wandb_run.py:_redirect():2342] redirect: wrap_raw
-2025-08-09 00:09:16,420 INFO    MainThread:310696 [wandb_run.py:_redirect():2411] Wrapping output streams.
-2025-08-09 00:09:16,420 INFO    MainThread:310696 [wandb_run.py:_redirect():2434] Redirects installed.
-2025-08-09 00:09:16,435 INFO    MainThread:310696 [wandb_init.py:init():1075] run started, returning control to user process
-2025-08-09 00:13:12,126 INFO    MsgRouterThr:310696 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.

wandb/run-20250809_000914-ttc1ybny/run-ttc1ybny.wandb DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d9f44546feb9e553b748a1f2a2066c2d29d824020c23458a2542027b9fbd9335
-size 317821

wandb/run-20250809_002817-g4nrjez0/files/config.yaml DELETED Viewed

@@ -1,225 +0,0 @@
-_wandb:
-    value:
-        cli_version: 0.21.1
-        e:
-            vllu22bcqlllmyuwxuzw6uvs6d6got8z:
-                args:
-                    - --model.type
-                    - prism-qwen25-extra-dinosiglip-224px+7b
-                    - --model.finetune_per_device_batch_size
-                    - "8"
-                codePath: scripts/pretrain.py
-                codePathLocal: scripts/pretrain.py
-                cpu_count: 96
-                cpu_count_logical: 192
-                cudaVersion: "12.8"
-                disk:
-                    /:
-                        total: "520120602624"
-                        used: "64347758592"
-                email: bjyoon513@gmail.com
-                executable: /fsx/byungjun/miniconda3/envs/minivla/bin/python3.10
-                git:
-                    commit: 1441372f4af4f91d0e99c9a104d10536d8ad566d
-                    remote: https://github.com/happyhappy-jun/openvla-mini
-                gpu: NVIDIA H200
-                gpu_count: 8
-                gpu_nvidia:
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-95044091-c6a6-4e9d-26a3-0249feeaf796
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-e54a8b43-5dd9-a2f8-8a71-b254a12248ec
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-daed9c7c-6f35-ec0c-abd5-49e1f7d48645
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-acf2a7ee-d8a1-bb8c-be49-c7a07c0f07da
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-0245a021-19ca-991a-61b0-94cbc116d182
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-4213a83d-27d3-97d3-0cec-f9700637d48c
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-8be9f5c6-a214-8b33-0ac2-217892edfa6f
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-5c5fce07-faf7-1345-d5ea-4c13e75769e7
-                host: compute-st-kait-gpu-2
-                memory:
-                    total: "2147425312768"
-                os: Linux-6.8.0-1028-aws-x86_64-with-glibc2.35
-                program: /fsx/byungjun/openvla-mini/scripts/pretrain.py
-                python: CPython 3.10.18
-                root: runs/prism-qwen25-extra-dinosiglip-224px+7b+stage-finetune+x7
-                slurm:
-                    cluster_name: kait-gpu-06-parallelcluster
-                    conf: /opt/slurm/etc/slurm.conf
-                    cpu_bind: quiet,mask_cpu:0xFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF00000000
-                    cpu_bind_list: 0xFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF00000000
-                    cpu_bind_type: 'mask_cpu:'
-                    cpu_bind_verbose: quiet
-                    cpus_on_node: "128"
-                    gpus: "8"
-                    gpus_on_node: "8"
-                    gtids: "0"
-                    job_cpus_per_node: "128"
-                    job_end_time: "1786233065"
-                    job_gid: "1004"
-                    job_group: byungjun
-                    job_id: "527"
-                    job_name: bash
-                    job_nodelist: compute-st-kait-gpu-2
-                    job_num_nodes: "1"
-                    job_partition: batch2
-                    job_start_time: "1754697065"
-                    job_uid: "1004"
-                    job_user: byungjun
-                    jobid: "527"
-                    launch_node_ipaddr: 10.10.47.245
-                    localid: "0"
-                    nnodes: "1"
-                    nodeid: "0"
-                    nodelist: compute-st-kait-gpu-2
-                    nprocs: "1"
-                    ntasks: "1"
-                    prio_process: "0"
-                    procid: "0"
-                    pty_port: "36537"
-                    pty_win_col: "362"
-                    pty_win_row: "84"
-                    srun_comm_host: 10.10.47.245
-                    srun_comm_port: "45601"
-                    step_gpus: 0,1,2,3,4,5,6,7
-                    step_id: "0"
-                    step_launcher_port: "45601"
-                    step_nodelist: compute-st-kait-gpu-2
-                    step_num_nodes: "1"
-                    step_num_tasks: "1"
-                    step_tasks_per_node: "1"
-                    stepid: "0"
-                    submit_dir: /fsx/byungjun/openvla-mini
-                    submit_host: ip-10-10-47-245
-                    task_pid: "299864"
-                    tasks_per_node: "1"
-                    topology_addr: compute-st-kait-gpu-2
-                    topology_addr_pattern: node
-                    umask: "0002"
-                startedAt: "2025-08-09T00:28:17.772013Z"
-                writerId: vllu22bcqlllmyuwxuzw6uvs6d6got8z
-        m: []
-        python_version: 3.10.18
-        t:
-            "1":
-                - 1
-                - 2
-                - 3
-                - 11
-                - 41
-                - 49
-                - 63
-                - 71
-            "2":
-                - 1
-                - 2
-                - 3
-                - 11
-                - 41
-                - 49
-                - 63
-                - 71
-            "3":
-                - 13
-                - 16
-                - 61
-            "4": 3.10.18
-            "5": 0.21.1
-            "6": 4.40.1
-            "12": 0.21.1
-            "13": linux-x86_64
-dataset:
-    value:
-        align_stage_components:
-            - download/llava-laion-cc-sbu-558k/chat.json
-            - download/llava-laion-cc-sbu-558k
-        dataset_id: llava-v15
-        dataset_root_dir: data2
-        finetune_stage_components:
-            - download/llava-v1.5-instruct/llava_v1_5_mix665k.json
-            - download/llava-v1.5-instruct
-        type: llava-v15
-hf_token:
-    value: .hf_token
-model:
-    value:
-        align_epochs: 1
-        align_global_batch_size: 96
-        align_learning_rate: 0.001
-        align_lr_scheduler_type: linear-warmup+cosine-decay
-        align_max_grad_norm: 1
-        align_max_steps: null
-        align_per_device_batch_size: 16
-        align_save_every_n_steps: 10000
-        align_train_strategy: fsdp-shard-grad-op
-        align_warmup_ratio: 0.03
-        align_weight_decay: 0
-        arch_specifier: no-align+fused-gelu-mlp
-        enable_gradient_checkpointing: true
-        enable_mixed_precision_training: true
-        finetune_epochs: 2
-        finetune_global_batch_size: 128
-        finetune_learning_rate: 2e-05
-        finetune_lr_scheduler_type: linear-warmup+cosine-decay
-        finetune_max_grad_norm: 1
-        finetune_max_steps: null
-        finetune_per_device_batch_size: 8
-        finetune_save_every_n_steps: 10000
-        finetune_train_strategy: fsdp-full-shard
-        finetune_warmup_ratio: 0.03
-        finetune_weight_decay: 0.1
-        image_resize_strategy: resize-naive
-        image_sequence_len: 1
-        llm_backbone_id: qwen25-7b-extra
-        llm_max_length: 32768
-        model_id: prism-qwen25-extra-dinosiglip-224px+7b
-        reduce_in_full_precision: false
-        type: prism-qwen25-extra-dinosiglip-224px+7b
-        vision_backbone_id: dinosiglip-vit-so-224px
-pretrained_checkpoint:
-    value: null
-run_id:
-    value: prism-qwen25-extra-dinosiglip-224px+7b+stage-finetune+x7
-run_root_dir:
-    value: runs
-seed:
-    value: 7
-stage:
-    value: finetune
-trackers:
-    value:
-        - jsonl
-        - wandb
-wandb_entity:
-    value: null
-wandb_project:
-    value: prismatic

wandb/run-20250809_002817-g4nrjez0/files/output.log DELETED Viewed

@@ -1,15 +0,0 @@
-[2;36m08/09 [00:28:19][0m[2;36m [0m[34mINFO    [0m | >> [1m[[0m*[1m][0m Starting Training Loop                                                                                                                                                                                                                                                                                                  ]8;id=176211;file:///fsx/byungjun/openvla-mini/scripts/pretrain.py\[2mpretrain.py[0m]8;;\[2m:[0m]8;id=640595;file:///fsx/byungjun/openvla-mini/scripts/pretrain.py#231\[2m231[0m]8;;\
-Traceback (most recent call last):
-  File "/fsx/byungjun/openvla-mini/scripts/pretrain.py", line 245, in <module>
-    pretrain()
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/draccus/argparsing.py", line 203, in wrapper_inner
-    response = fn(cfg, *args, **kwargs)
-  File "/fsx/byungjun/openvla-mini/scripts/pretrain.py", line 232, in pretrain
-    train_strategy.run_training(train_dataset, collator, metrics, stage=cfg.stage, seed=cfg.seed)
-  File "/fsx/byungjun/openvla-mini/prismatic/training/strategies/base_strategy.py", line 215, in run_training
-    normalized_loss.backward()
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/_tensor.py", line 522, in backward
-    torch.autograd.backward(
-  File "/fsx/byungjun/miniconda3/envs/minivla/lib/python3.10/site-packages/torch/autograd/__init__.py", line 266, in backward
-    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
-KeyboardInterrupt

wandb/run-20250809_002817-g4nrjez0/files/requirements.txt DELETED Viewed

@@ -1,154 +0,0 @@
-nvidia-nvtx-cu12==12.1.105
-kiwisolver==1.4.8
-contourpy==1.3.2
-nvidia-cudnn-cu12==8.9.2.26
-tokenizers==0.19.1
-nvidia-cuda-runtime-cu12==12.1.105
-triton==2.2.0
-hf-xet==1.1.7
-mkl-service==2.4.0
-mkl_random==1.2.8
-pycparser==2.21
-ml-dtypes==0.2.0
-tensorflow==2.15.0
-nvidia-cufft-cu12==11.0.2.54
-pyasn1_modules==0.4.2
-numpy==1.26.4
-numpy==2.0.1
-mypy_extensions==1.1.0
-mkl_fft==1.3.11
-mdurl==0.1.2
-flash-attn==2.5.5
-six==1.17.0
-zipp==3.23.0
-dlimp==0.0.1
-json-numpy==2.1.1
-PySocks==1.7.1
-cffi==1.17.1
-Werkzeug==3.1.3
-rsa==4.9.1
-packaging==25.0
-draccus==0.8.0
-typing-inspection==0.4.1
-Markdown==3.8.2
-wandb==0.21.1
-trimesh==4.7.1
-Pygments==2.19.2
-pillow==11.3.0
-libclang==18.1.1
-typing-inspect==0.9.0
-attrs==25.3.0
-scipy==1.15.3
-scipy==1.11.2
-wrapt==1.14.1
-safetensors==0.6.2
-nvidia-curand-cu12==10.3.2.106
-etils==1.13.0
-OpenEXR==3.3.5
-smmap==5.0.2
-sentencepiece==0.1.99
-pyparsing==3.2.3
-astunparse==1.6.3
-opt_einsum==3.4.0
-tensorflow-graphics==2021.12.3
-fsspec==2025.7.0
-sympy==1.13.3
-timm==0.9.10
-pydantic==2.11.7
-tensorboard==2.15.2
-brotlicffi==1.0.9.2
-torch==2.2.0
-flatbuffers==25.2.10
-filelock==3.17.0
-click==8.2.1
-nvidia-cuda-cupti-cu12==12.1.105
-ninja==1.11.1.4
-typeguard==2.13.3
-nvidia-nccl-cu12==2.19.3
-openvla==0.0.3
-MarkupSafe==3.0.2
-rich==14.1.0
-nvidia-nvjitlink-cu12==12.9.86
-tensorflow-datasets==4.9.3
-tensorflow-io-gcs-filesystem==0.37.1
-networkx==3.4.2
-huggingface-hub==0.34.4
-absl-py==2.3.1
-nvidia-cublas-cu12==12.1.3.1
-torchaudio==2.2.0
-gmpy2==2.2.1
-array_record==0.7.2
-tensorflow-addons==0.23.0
-oauthlib==3.3.1
-PyYAML==6.0.2
-regex==2025.7.34
-nvidia-cuda-nvrtc-cu12==12.1.105
-setuptools==78.1.1
-toml==0.10.2
-google-auth==2.40.3
-certifi==2025.8.3
-keras==2.15.0
-torchvision==0.17.0
-grpcio==1.74.0
-fonttools==4.59.0
-transformers==4.40.1
-annotated-types==0.7.0
-charset-normalizer==3.3.2
-promise==2.3
-mergedeep==1.3.4
-gast==0.6.0
-cachetools==5.5.2
-termcolor==3.1.0
-pyyaml-include==1.4.1
-importlib_resources==6.5.2
-nvidia-cusolver-cu12==11.4.5.107
-h5py==3.14.0
-python-dateutil==2.9.0.post0
-peft==0.11.1
-urllib3==2.5.0
-einops==0.8.1
-tensorflow-estimator==2.15.0
-requests==2.32.4
-psutil==7.0.0
-requests-oauthlib==2.0.0
-pip==25.1
-markdown-it-py==3.0.0
-nvidia-cusparse-cu12==12.1.0.106
-idna==3.7
-tqdm==4.67.1
-dm-tree==0.1.9
-gitdb==4.0.12
-typing_extensions==4.12.2
-matplotlib==3.10.5
-accelerate==1.10.0
-tensorflow-metadata==1.17.2
-sentry-sdk==2.34.1
-jsonlines==4.0.0
-protobuf==4.21.12
-pyasn1==0.6.1
-google-pasta==0.2.0
-mpmath==1.3.0
-Jinja2==3.1.6
-tensorboard-data-server==0.7.2
-pydantic_core==2.33.2
-google-auth-oauthlib==1.2.2
-cycler==0.12.1
-platformdirs==4.3.8
-GitPython==3.1.45
-wheel==0.45.1
-backports.tarfile==1.2.0
-jaraco.collections==5.1.0
-autocommand==2.2.2
-typeguard==4.3.0
-tomli==2.0.1
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-wheel==0.45.1
-more-itertools==10.3.0
-inflect==7.3.1
-jaraco.context==5.3.0
-typing_extensions==4.12.2
-jaraco.functools==4.0.1
-packaging==24.2
-zipp==3.19.2
-jaraco.text==3.12.1

wandb/run-20250809_002817-g4nrjez0/files/wandb-metadata.json DELETED Viewed

@@ -1,149 +0,0 @@
-{
-  "os": "Linux-6.8.0-1028-aws-x86_64-with-glibc2.35",
-  "python": "CPython 3.10.18",
-  "startedAt": "2025-08-09T00:28:17.772013Z",
-  "args": [
-    "--model.type",
-    "prism-qwen25-extra-dinosiglip-224px+7b",
-    "--model.finetune_per_device_batch_size",
-    "8"
-  ],
-  "program": "/fsx/byungjun/openvla-mini/scripts/pretrain.py",
-  "codePath": "scripts/pretrain.py",
-  "codePathLocal": "scripts/pretrain.py",
-  "git": {
-    "remote": "https://github.com/happyhappy-jun/openvla-mini",
-    "commit": "1441372f4af4f91d0e99c9a104d10536d8ad566d"
-  },
-  "email": "bjyoon513@gmail.com",
-  "root": "runs/prism-qwen25-extra-dinosiglip-224px+7b+stage-finetune+x7",
-  "host": "compute-st-kait-gpu-2",
-  "executable": "/fsx/byungjun/miniconda3/envs/minivla/bin/python3.10",
-  "cpu_count": 96,
-  "cpu_count_logical": 192,
-  "gpu": "NVIDIA H200",
-  "gpu_count": 8,
-  "disk": {
-    "/": {
-      "total": "520120602624",
-      "used": "64347758592"
-    }
-  },
-  "memory": {
-    "total": "2147425312768"
-  },
-  "gpu_nvidia": [
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-95044091-c6a6-4e9d-26a3-0249feeaf796"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-e54a8b43-5dd9-a2f8-8a71-b254a12248ec"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-daed9c7c-6f35-ec0c-abd5-49e1f7d48645"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-acf2a7ee-d8a1-bb8c-be49-c7a07c0f07da"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-0245a021-19ca-991a-61b0-94cbc116d182"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-4213a83d-27d3-97d3-0cec-f9700637d48c"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-8be9f5c6-a214-8b33-0ac2-217892edfa6f"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-5c5fce07-faf7-1345-d5ea-4c13e75769e7"
-    }
-  ],
-  "cudaVersion": "12.8",
-  "slurm": {
-    "cluster_name": "kait-gpu-06-parallelcluster",
-    "conf": "/opt/slurm/etc/slurm.conf",
-    "cpu_bind": "quiet,mask_cpu:0xFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF00000000",
-    "cpu_bind_list": "0xFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF00000000",
-    "cpu_bind_type": "mask_cpu:",
-    "cpu_bind_verbose": "quiet",
-    "cpus_on_node": "128",
-    "gpus": "8",
-    "gpus_on_node": "8",
-    "gtids": "0",
-    "job_cpus_per_node": "128",
-    "job_end_time": "1786233065",
-    "job_gid": "1004",
-    "job_group": "byungjun",
-    "job_id": "527",
-    "job_name": "bash",
-    "job_nodelist": "compute-st-kait-gpu-2",
-    "job_num_nodes": "1",
-    "job_partition": "batch2",
-    "job_start_time": "1754697065",
-    "job_uid": "1004",
-    "job_user": "byungjun",
-    "jobid": "527",
-    "launch_node_ipaddr": "10.10.47.245",
-    "localid": "0",
-    "nnodes": "1",
-    "nodeid": "0",
-    "nodelist": "compute-st-kait-gpu-2",
-    "nprocs": "1",
-    "ntasks": "1",
-    "prio_process": "0",
-    "procid": "0",
-    "pty_port": "36537",
-    "pty_win_col": "362",
-    "pty_win_row": "84",
-    "srun_comm_host": "10.10.47.245",
-    "srun_comm_port": "45601",
-    "step_gpus": "0,1,2,3,4,5,6,7",
-    "step_id": "0",
-    "step_launcher_port": "45601",
-    "step_nodelist": "compute-st-kait-gpu-2",
-    "step_num_nodes": "1",
-    "step_num_tasks": "1",
-    "step_tasks_per_node": "1",
-    "stepid": "0",
-    "submit_dir": "/fsx/byungjun/openvla-mini",
-    "submit_host": "ip-10-10-47-245",
-    "task_pid": "299864",
-    "tasks_per_node": "1",
-    "topology_addr": "compute-st-kait-gpu-2",
-    "topology_addr_pattern": "node",
-    "umask": "0002"
-  },
-  "writerId": "vllu22bcqlllmyuwxuzw6uvs6d6got8z"
-}

wandb/run-20250809_002817-g4nrjez0/files/wandb-summary.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"Finetune/Step Time":1.899375669658184,"_timestamp":1.7546996588573313e+09,"Finetune/Step":179,"_runtime":361.177352297,"_step":179,"Finetune/Loss":1.1296229362487793,"Finetune/Loss (Raw)":0.8316053152084351,"_wandb":{"runtime":361},"Finetune/Learning Rate":1.1511254019292605e-05}

wandb/run-20250809_002817-g4nrjez0/logs/debug-core.log DELETED Viewed

@@ -1,14 +0,0 @@
-{"time":"2025-08-09T00:28:18.119666003Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpyqnvs5s6/port-319874.txt","pid":319874,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
-{"time":"2025-08-09T00:28:18.120962189Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":319874}
-{"time":"2025-08-09T00:28:18.120959456Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-319874-326372-1479802229/socket","Net":"unix"}}
-{"time":"2025-08-09T00:28:18.180753532Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
-{"time":"2025-08-09T00:28:18.210476243Z","level":"INFO","msg":"handleInformInit: received","streamId":"g4nrjez0","id":"1(@)"}
-{"time":"2025-08-09T00:28:18.565791506Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"g4nrjez0","id":"1(@)"}
-{"time":"2025-08-09T00:34:20.084549801Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
-{"time":"2025-08-09T00:34:20.086127609Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
-{"time":"2025-08-09T00:34:20.08614172Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-08-09T00:34:20.086227672Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
-{"time":"2025-08-09T00:34:20.08628093Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-319874-326372-1479802229/socket","Net":"unix"}}
-{"time":"2025-08-09T00:34:20.962961869Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
-{"time":"2025-08-09T00:34:20.962994307Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
-{"time":"2025-08-09T00:34:20.96300497Z","level":"INFO","msg":"server is closed"}

wandb/run-20250809_002817-g4nrjez0/logs/debug-internal.log DELETED Viewed

@@ -1,11 +0,0 @@
-{"time":"2025-08-09T00:28:18.213209216Z","level":"INFO","msg":"stream: starting","core version":"0.21.1"}
-{"time":"2025-08-09T00:28:18.565722905Z","level":"INFO","msg":"stream: created new stream","id":"g4nrjez0"}
-{"time":"2025-08-09T00:28:18.565783517Z","level":"INFO","msg":"stream: started","id":"g4nrjez0"}
-{"time":"2025-08-09T00:28:18.565804516Z","level":"INFO","msg":"writer: started","stream_id":"g4nrjez0"}
-{"time":"2025-08-09T00:28:18.56583991Z","level":"INFO","msg":"sender: started","stream_id":"g4nrjez0"}
-{"time":"2025-08-09T00:28:18.565819921Z","level":"INFO","msg":"handler: started","stream_id":"g4nrjez0"}
-{"time":"2025-08-09T00:34:20.086137698Z","level":"INFO","msg":"stream: closing","id":"g4nrjez0"}
-{"time":"2025-08-09T00:34:20.688455806Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-08-09T00:34:20.949879505Z","level":"INFO","msg":"handler: closed","stream_id":"g4nrjez0"}
-{"time":"2025-08-09T00:34:20.950937516Z","level":"INFO","msg":"sender: closed","stream_id":"g4nrjez0"}
-{"time":"2025-08-09T00:34:20.950965207Z","level":"INFO","msg":"stream: closed","id":"g4nrjez0"}

wandb/run-20250809_002817-g4nrjez0/logs/debug.log DELETED Viewed

@@ -1,22 +0,0 @@
-2025-08-09 00:28:17,895 INFO    MainThread:319874 [wandb_setup.py:_flush():80] Current SDK version is 0.21.1
-2025-08-09 00:28:17,897 INFO    MainThread:319874 [wandb_setup.py:_flush():80] Configure stats pid to 319874
-2025-08-09 00:28:17,897 INFO    MainThread:319874 [wandb_setup.py:_flush():80] Loading settings from /fsx/byungjun/.config/wandb/settings
-2025-08-09 00:28:17,897 INFO    MainThread:319874 [wandb_setup.py:_flush():80] Loading settings from /fsx/byungjun/openvla-mini/wandb/settings
-2025-08-09 00:28:17,897 INFO    MainThread:319874 [wandb_setup.py:_flush():80] Loading settings from environment variables
-2025-08-09 00:28:17,897 INFO    MainThread:319874 [wandb_init.py:setup_run_log_directory():703] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+7b+stage-finetune+x7/wandb/run-20250809_002817-g4nrjez0/logs/debug.log
-2025-08-09 00:28:17,898 INFO    MainThread:319874 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+7b+stage-finetune+x7/wandb/run-20250809_002817-g4nrjez0/logs/debug-internal.log
-2025-08-09 00:28:17,898 INFO    MainThread:319874 [wandb_init.py:init():830] calling init triggers
-2025-08-09 00:28:17,898 INFO    MainThread:319874 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
-config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+7b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+7b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-7b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'image_sequence_len': 1, 'align_epochs': 1, 'align_max_steps': None, 'align_save_every_n_steps': 10000, 'align_global_batch_size': 96, 'align_per_device_batch_size': 16, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_save_every_n_steps': 10000, 'finetune_global_batch_size': 128, 'finetune_per_device_batch_size': 8, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data2'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+7b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'prismatic', 'wandb_entity': None, '_wandb': {}}
-2025-08-09 00:28:17,898 INFO    MainThread:319874 [wandb_init.py:init():871] starting backend
-2025-08-09 00:28:18,180 INFO    MainThread:319874 [wandb_init.py:init():874] sending inform_init request
-2025-08-09 00:28:18,209 INFO    MainThread:319874 [wandb_init.py:init():882] backend started and connected
-2025-08-09 00:28:18,211 INFO    MainThread:319874 [wandb_init.py:init():953] updated telemetry
-2025-08-09 00:28:18,280 INFO    MainThread:319874 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
-2025-08-09 00:28:18,900 INFO    MainThread:319874 [wandb_init.py:init():1029] starting run threads in backend
-2025-08-09 00:28:19,735 INFO    MainThread:319874 [wandb_run.py:_console_start():2494] atexit reg
-2025-08-09 00:28:19,735 INFO    MainThread:319874 [wandb_run.py:_redirect():2342] redirect: wrap_raw
-2025-08-09 00:28:19,735 INFO    MainThread:319874 [wandb_run.py:_redirect():2411] Wrapping output streams.
-2025-08-09 00:28:19,735 INFO    MainThread:319874 [wandb_run.py:_redirect():2434] Redirects installed.
-2025-08-09 00:28:19,755 INFO    MainThread:319874 [wandb_init.py:init():1075] run started, returning control to user process
-2025-08-09 00:34:20,082 INFO    MsgRouterThr:319874 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.

wandb/run-20250809_002817-g4nrjez0/run-g4nrjez0.wandb DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ba6a8ec227343c714732e0f716eccc6ab27ad1d25df82332a01b36eded991434
-size 359681

wandb/run-20250809_004353-jppa1ary/files/config.yaml DELETED Viewed

@@ -1,205 +0,0 @@
-_wandb:
-    value:
-        cli_version: 0.21.1
-        e:
-            kmjhvvefrqpn82qctd036hqj5qg0u497:
-                args:
-                    - --model.type
-                    - prism-qwen25-extra-dinosiglip-224px+7b
-                    - --model.finetune_per_device_batch_size
-                    - "4"
-                codePath: scripts/pretrain.py
-                codePathLocal: scripts/pretrain.py
-                cpu_count: 96
-                cpu_count_logical: 192
-                cudaVersion: "12.8"
-                disk:
-                    /:
-                        total: "520120602624"
-                        used: "64347889664"
-                email: bjyoon513@gmail.com
-                executable: /fsx/byungjun/miniconda3/envs/minivla/bin/python3.10
-                git:
-                    commit: a72e9ce42035282eb6c950204e50ef3c4fbb363d
-                    remote: https://github.com/happyhappy-jun/openvla-mini
-                gpu: NVIDIA H200
-                gpu_count: 8
-                gpu_nvidia:
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-95044091-c6a6-4e9d-26a3-0249feeaf796
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-e54a8b43-5dd9-a2f8-8a71-b254a12248ec
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-daed9c7c-6f35-ec0c-abd5-49e1f7d48645
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-acf2a7ee-d8a1-bb8c-be49-c7a07c0f07da
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-0245a021-19ca-991a-61b0-94cbc116d182
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-4213a83d-27d3-97d3-0cec-f9700637d48c
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-8be9f5c6-a214-8b33-0ac2-217892edfa6f
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-5c5fce07-faf7-1345-d5ea-4c13e75769e7
-                host: compute-st-kait-gpu-2
-                memory:
-                    total: "2147425312768"
-                os: Linux-6.8.0-1028-aws-x86_64-with-glibc2.35
-                program: /fsx/byungjun/openvla-mini/scripts/pretrain.py
-                python: CPython 3.10.18
-                root: runs/prism-qwen25-extra-dinosiglip-224px+7b+stage-finetune+x7
-                slurm:
-                    cluster_name: kait-gpu-06-parallelcluster
-                    conf: /opt/slurm/etc/slurm.conf
-                    cpus_on_node: "128"
-                    gpus: "8"
-                    gpus_on_node: "8"
-                    gtids: "0"
-                    job_cpus_per_node: "128"
-                    job_end_time: "1754959041"
-                    job_gid: "1004"
-                    job_gpus: 0,1,2,3,4,5,6,7
-                    job_id: "530"
-                    job_name: qwen_3b_vlm_finetune
-                    job_nodelist: compute-st-kait-gpu-2
-                    job_num_nodes: "1"
-                    job_partition: batch2
-                    job_start_time: "1754699841"
-                    job_uid: "1004"
-                    job_user: byungjun
-                    jobid: "530"
-                    localid: "0"
-                    nnodes: "1"
-                    nodeid: "0"
-                    nodelist: compute-st-kait-gpu-2
-                    prio_process: "0"
-                    procid: "0"
-                    submit_dir: /fsx/byungjun/openvla-mini
-                    submit_host: ip-10-10-47-245
-                    task_pid: "332240"
-                    tasks_per_node: "128"
-                    topology_addr: compute-st-kait-gpu-2
-                    topology_addr_pattern: node
-                startedAt: "2025-08-09T00:43:53.494358Z"
-                writerId: kmjhvvefrqpn82qctd036hqj5qg0u497
-        m: []
-        python_version: 3.10.18
-        t:
-            "1":
-                - 1
-                - 2
-                - 3
-                - 11
-                - 41
-                - 49
-                - 63
-                - 71
-            "2":
-                - 1
-                - 2
-                - 3
-                - 11
-                - 41
-                - 49
-                - 63
-                - 71
-            "3":
-                - 2
-                - 13
-                - 16
-                - 61
-            "4": 3.10.18
-            "5": 0.21.1
-            "6": 4.40.1
-            "12": 0.21.1
-            "13": linux-x86_64
-dataset:
-    value:
-        align_stage_components:
-            - download/llava-laion-cc-sbu-558k/chat.json
-            - download/llava-laion-cc-sbu-558k
-        dataset_id: llava-v15
-        dataset_root_dir: data2
-        finetune_stage_components:
-            - download/llava-v1.5-instruct/llava_v1_5_mix665k.json
-            - download/llava-v1.5-instruct
-        type: llava-v15
-hf_token:
-    value: .hf_token
-model:
-    value:
-        align_epochs: 1
-        align_global_batch_size: 96
-        align_learning_rate: 0.001
-        align_lr_scheduler_type: linear-warmup+cosine-decay
-        align_max_grad_norm: 1
-        align_max_steps: null
-        align_per_device_batch_size: 16
-        align_save_every_n_steps: 10000
-        align_train_strategy: fsdp-shard-grad-op
-        align_warmup_ratio: 0.03
-        align_weight_decay: 0
-        arch_specifier: no-align+fused-gelu-mlp
-        enable_gradient_checkpointing: true
-        enable_mixed_precision_training: true
-        finetune_epochs: 2
-        finetune_global_batch_size: 128
-        finetune_learning_rate: 2e-05
-        finetune_lr_scheduler_type: linear-warmup+cosine-decay
-        finetune_max_grad_norm: 1
-        finetune_max_steps: null
-        finetune_per_device_batch_size: 4
-        finetune_save_every_n_steps: 10000
-        finetune_train_strategy: fsdp-full-shard
-        finetune_warmup_ratio: 0.03
-        finetune_weight_decay: 0.1
-        image_resize_strategy: resize-naive
-        image_sequence_len: 1
-        llm_backbone_id: qwen25-7b-extra
-        llm_max_length: 32768
-        model_id: prism-qwen25-extra-dinosiglip-224px+7b
-        reduce_in_full_precision: false
-        type: prism-qwen25-extra-dinosiglip-224px+7b
-        vision_backbone_id: dinosiglip-vit-so-224px
-pretrained_checkpoint:
-    value: null
-run_id:
-    value: prism-qwen25-extra-dinosiglip-224px+7b+stage-finetune+x7
-run_root_dir:
-    value: runs
-seed:
-    value: 7
-stage:
-    value: finetune
-trackers:
-    value:
-        - jsonl
-        - wandb
-wandb_entity:
-    value: null
-wandb_project:
-    value: prismatic

wandb/run-20250809_004353-jppa1ary/files/output.log DELETED Viewed

@@ -1,4 +0,0 @@
-08/09 [00:43:55] INFO     | >> [*] Starting Training Loop        pretrain.py:231
-08/09 [07:37:35] INFO     | >> [*] Done with Training =>>        pretrain.py:235
-                          Finalizing Metrics

wandb/run-20250809_004353-jppa1ary/files/requirements.txt DELETED Viewed

@@ -1,154 +0,0 @@
-nvidia-nvtx-cu12==12.1.105
-kiwisolver==1.4.8
-contourpy==1.3.2
-nvidia-cudnn-cu12==8.9.2.26
-tokenizers==0.19.1
-nvidia-cuda-runtime-cu12==12.1.105
-triton==2.2.0
-hf-xet==1.1.7
-mkl-service==2.4.0
-mkl_random==1.2.8
-pycparser==2.21
-ml-dtypes==0.2.0
-tensorflow==2.15.0
-nvidia-cufft-cu12==11.0.2.54
-pyasn1_modules==0.4.2
-numpy==1.26.4
-numpy==2.0.1
-mypy_extensions==1.1.0
-mkl_fft==1.3.11
-mdurl==0.1.2
-flash-attn==2.5.5
-six==1.17.0
-zipp==3.23.0
-dlimp==0.0.1
-json-numpy==2.1.1
-PySocks==1.7.1
-cffi==1.17.1
-Werkzeug==3.1.3
-rsa==4.9.1
-packaging==25.0
-draccus==0.8.0
-typing-inspection==0.4.1
-Markdown==3.8.2
-wandb==0.21.1
-trimesh==4.7.1
-Pygments==2.19.2
-pillow==11.3.0
-libclang==18.1.1
-typing-inspect==0.9.0
-attrs==25.3.0
-scipy==1.15.3
-scipy==1.11.2
-wrapt==1.14.1
-safetensors==0.6.2
-nvidia-curand-cu12==10.3.2.106
-etils==1.13.0
-OpenEXR==3.3.5
-smmap==5.0.2
-sentencepiece==0.1.99
-pyparsing==3.2.3
-astunparse==1.6.3
-opt_einsum==3.4.0
-tensorflow-graphics==2021.12.3
-fsspec==2025.7.0
-sympy==1.13.3
-timm==0.9.10
-pydantic==2.11.7
-tensorboard==2.15.2
-brotlicffi==1.0.9.2
-torch==2.2.0
-flatbuffers==25.2.10
-filelock==3.17.0
-click==8.2.1
-nvidia-cuda-cupti-cu12==12.1.105
-ninja==1.11.1.4
-typeguard==2.13.3
-nvidia-nccl-cu12==2.19.3
-openvla==0.0.3
-MarkupSafe==3.0.2
-rich==14.1.0
-nvidia-nvjitlink-cu12==12.9.86
-tensorflow-datasets==4.9.3
-tensorflow-io-gcs-filesystem==0.37.1
-networkx==3.4.2
-huggingface-hub==0.34.4
-absl-py==2.3.1
-nvidia-cublas-cu12==12.1.3.1
-torchaudio==2.2.0
-gmpy2==2.2.1
-array_record==0.7.2
-tensorflow-addons==0.23.0
-oauthlib==3.3.1
-PyYAML==6.0.2
-regex==2025.7.34
-nvidia-cuda-nvrtc-cu12==12.1.105
-setuptools==78.1.1
-toml==0.10.2
-google-auth==2.40.3
-certifi==2025.8.3
-keras==2.15.0
-torchvision==0.17.0
-grpcio==1.74.0
-fonttools==4.59.0
-transformers==4.40.1
-annotated-types==0.7.0
-charset-normalizer==3.3.2
-promise==2.3
-mergedeep==1.3.4
-gast==0.6.0
-cachetools==5.5.2
-termcolor==3.1.0
-pyyaml-include==1.4.1
-importlib_resources==6.5.2
-nvidia-cusolver-cu12==11.4.5.107
-h5py==3.14.0
-python-dateutil==2.9.0.post0
-peft==0.11.1
-urllib3==2.5.0
-einops==0.8.1
-tensorflow-estimator==2.15.0
-requests==2.32.4
-psutil==7.0.0
-requests-oauthlib==2.0.0
-pip==25.1
-markdown-it-py==3.0.0
-nvidia-cusparse-cu12==12.1.0.106
-idna==3.7
-tqdm==4.67.1
-dm-tree==0.1.9
-gitdb==4.0.12
-typing_extensions==4.12.2
-matplotlib==3.10.5
-accelerate==1.10.0
-tensorflow-metadata==1.17.2
-sentry-sdk==2.34.1
-jsonlines==4.0.0
-protobuf==4.21.12
-pyasn1==0.6.1
-google-pasta==0.2.0
-mpmath==1.3.0
-Jinja2==3.1.6
-tensorboard-data-server==0.7.2
-pydantic_core==2.33.2
-google-auth-oauthlib==1.2.2
-cycler==0.12.1
-platformdirs==4.3.8
-GitPython==3.1.45
-wheel==0.45.1
-backports.tarfile==1.2.0
-jaraco.collections==5.1.0
-autocommand==2.2.2
-typeguard==4.3.0
-tomli==2.0.1
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-wheel==0.45.1
-more-itertools==10.3.0
-inflect==7.3.1
-jaraco.context==5.3.0
-typing_extensions==4.12.2
-jaraco.functools==4.0.1
-packaging==24.2
-zipp==3.19.2
-jaraco.text==3.12.1

wandb/run-20250809_004353-jppa1ary/files/wandb-metadata.json DELETED Viewed

@@ -1,128 +0,0 @@
-{
-  "os": "Linux-6.8.0-1028-aws-x86_64-with-glibc2.35",
-  "python": "CPython 3.10.18",
-  "startedAt": "2025-08-09T00:43:53.494358Z",
-  "args": [
-    "--model.type",
-    "prism-qwen25-extra-dinosiglip-224px+7b",
-    "--model.finetune_per_device_batch_size",
-    "4"
-  ],
-  "program": "/fsx/byungjun/openvla-mini/scripts/pretrain.py",
-  "codePath": "scripts/pretrain.py",
-  "codePathLocal": "scripts/pretrain.py",
-  "git": {
-    "remote": "https://github.com/happyhappy-jun/openvla-mini",
-    "commit": "a72e9ce42035282eb6c950204e50ef3c4fbb363d"
-  },
-  "email": "bjyoon513@gmail.com",
-  "root": "runs/prism-qwen25-extra-dinosiglip-224px+7b+stage-finetune+x7",
-  "host": "compute-st-kait-gpu-2",
-  "executable": "/fsx/byungjun/miniconda3/envs/minivla/bin/python3.10",
-  "cpu_count": 96,
-  "cpu_count_logical": 192,
-  "gpu": "NVIDIA H200",
-  "gpu_count": 8,
-  "disk": {
-    "/": {
-      "total": "520120602624",
-      "used": "64347889664"
-    }
-  },
-  "memory": {
-    "total": "2147425312768"
-  },
-  "gpu_nvidia": [
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-95044091-c6a6-4e9d-26a3-0249feeaf796"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-e54a8b43-5dd9-a2f8-8a71-b254a12248ec"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-daed9c7c-6f35-ec0c-abd5-49e1f7d48645"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-acf2a7ee-d8a1-bb8c-be49-c7a07c0f07da"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-0245a021-19ca-991a-61b0-94cbc116d182"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-4213a83d-27d3-97d3-0cec-f9700637d48c"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-8be9f5c6-a214-8b33-0ac2-217892edfa6f"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-5c5fce07-faf7-1345-d5ea-4c13e75769e7"
-    }
-  ],
-  "cudaVersion": "12.8",
-  "slurm": {
-    "cluster_name": "kait-gpu-06-parallelcluster",
-    "conf": "/opt/slurm/etc/slurm.conf",
-    "cpus_on_node": "128",
-    "gpus": "8",
-    "gpus_on_node": "8",
-    "gtids": "0",
-    "job_cpus_per_node": "128",
-    "job_end_time": "1754959041",
-    "job_gid": "1004",
-    "job_gpus": "0,1,2,3,4,5,6,7",
-    "job_id": "530",
-    "job_name": "qwen_3b_vlm_finetune",
-    "job_nodelist": "compute-st-kait-gpu-2",
-    "job_num_nodes": "1",
-    "job_partition": "batch2",
-    "job_start_time": "1754699841",
-    "job_uid": "1004",
-    "job_user": "byungjun",
-    "jobid": "530",
-    "localid": "0",
-    "nnodes": "1",
-    "nodeid": "0",
-    "nodelist": "compute-st-kait-gpu-2",
-    "prio_process": "0",
-    "procid": "0",
-    "submit_dir": "/fsx/byungjun/openvla-mini",
-    "submit_host": "ip-10-10-47-245",
-    "task_pid": "332240",
-    "tasks_per_node": "128",
-    "topology_addr": "compute-st-kait-gpu-2",
-    "topology_addr_pattern": "node"
-  },
-  "writerId": "kmjhvvefrqpn82qctd036hqj5qg0u497"
-}

wandb/run-20250809_004353-jppa1ary/files/wandb-summary.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"_wandb":{"runtime":24820},"Finetune/Loss":0.4964504539966583,"Finetune/Step Time":2.4623548705130816,"Finetune/Loss (Raw)":0.4348399043083191,"Finetune/Learning Rate":0,"_runtime":24820.710890925,"_step":10396,"_timestamp":1.7547249823812644e+09,"Finetune/Step":10396}

wandb/run-20250809_004353-jppa1ary/logs/debug-core.log DELETED Viewed

@@ -1,16 +0,0 @@
-{"time":"2025-08-09T00:43:53.81705927Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpci34j8hb/port-332325.txt","pid":332325,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
-{"time":"2025-08-09T00:43:53.818633166Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":332325}
-{"time":"2025-08-09T00:43:53.818616932Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-332325-335017-2636856031/socket","Net":"unix"}}
-{"time":"2025-08-09T00:43:53.878374806Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
-{"time":"2025-08-09T00:43:53.89799455Z","level":"INFO","msg":"handleInformInit: received","streamId":"jppa1ary","id":"1(@)"}
-{"time":"2025-08-09T00:43:54.352237644Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"jppa1ary","id":"1(@)"}
-{"time":"2025-08-09T07:37:36.50501082Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"jppa1ary","id":"1(@)"}
-{"time":"2025-08-09T07:37:36.600528305Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"jppa1ary","id":"1(@)"}
-{"time":"2025-08-09T07:41:09.482254737Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
-{"time":"2025-08-09T07:41:09.482358562Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
-{"time":"2025-08-09T07:41:09.482371713Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-08-09T07:41:09.482390424Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
-{"time":"2025-08-09T07:41:09.482459713Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-332325-335017-2636856031/socket","Net":"unix"}}
-{"time":"2025-08-09T07:41:09.482507448Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
-{"time":"2025-08-09T07:41:09.482515631Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
-{"time":"2025-08-09T07:41:09.482523879Z","level":"INFO","msg":"server is closed"}

wandb/run-20250809_004353-jppa1ary/logs/debug-internal.log DELETED Viewed

@@ -1,12 +0,0 @@
-{"time":"2025-08-09T00:43:53.899512181Z","level":"INFO","msg":"stream: starting","core version":"0.21.1"}
-{"time":"2025-08-09T00:43:54.352151834Z","level":"INFO","msg":"stream: created new stream","id":"jppa1ary"}
-{"time":"2025-08-09T00:43:54.352229223Z","level":"INFO","msg":"stream: started","id":"jppa1ary"}
-{"time":"2025-08-09T00:43:54.352242749Z","level":"INFO","msg":"handler: started","stream_id":"jppa1ary"}
-{"time":"2025-08-09T00:43:54.352276271Z","level":"INFO","msg":"writer: started","stream_id":"jppa1ary"}
-{"time":"2025-08-09T00:43:54.352258529Z","level":"INFO","msg":"sender: started","stream_id":"jppa1ary"}
-{"time":"2025-08-09T07:37:36.250440803Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-08-09T07:37:36.399127669Z","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading history steps 10395-10395, summary, console lines 1-3","runtime_seconds":0.146154819}],"total_operations":1}}
-{"time":"2025-08-09T07:37:36.508572754Z","level":"INFO","msg":"stream: closing","id":"jppa1ary"}
-{"time":"2025-08-09T07:37:36.508598422Z","level":"INFO","msg":"handler: closed","stream_id":"jppa1ary"}
-{"time":"2025-08-09T07:37:36.509838915Z","level":"INFO","msg":"sender: closed","stream_id":"jppa1ary"}
-{"time":"2025-08-09T07:37:36.509849291Z","level":"INFO","msg":"stream: closed","id":"jppa1ary"}

wandb/run-20250809_004353-jppa1ary/logs/debug.log DELETED Viewed

@@ -1,28 +0,0 @@
-2025-08-09 00:43:53,600 INFO    MainThread:332325 [wandb_setup.py:_flush():80] Current SDK version is 0.21.1
-2025-08-09 00:43:53,601 INFO    MainThread:332325 [wandb_setup.py:_flush():80] Configure stats pid to 332325
-2025-08-09 00:43:53,601 INFO    MainThread:332325 [wandb_setup.py:_flush():80] Loading settings from /fsx/byungjun/.config/wandb/settings
-2025-08-09 00:43:53,601 INFO    MainThread:332325 [wandb_setup.py:_flush():80] Loading settings from /fsx/byungjun/openvla-mini/wandb/settings
-2025-08-09 00:43:53,601 INFO    MainThread:332325 [wandb_setup.py:_flush():80] Loading settings from environment variables
-2025-08-09 00:43:53,601 INFO    MainThread:332325 [wandb_init.py:setup_run_log_directory():703] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+7b+stage-finetune+x7/wandb/run-20250809_004353-jppa1ary/logs/debug.log
-2025-08-09 00:43:53,602 INFO    MainThread:332325 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+7b+stage-finetune+x7/wandb/run-20250809_004353-jppa1ary/logs/debug-internal.log
-2025-08-09 00:43:53,602 INFO    MainThread:332325 [wandb_init.py:init():830] calling init triggers
-2025-08-09 00:43:53,602 INFO    MainThread:332325 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
-config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+7b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+7b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-7b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'image_sequence_len': 1, 'align_epochs': 1, 'align_max_steps': None, 'align_save_every_n_steps': 10000, 'align_global_batch_size': 96, 'align_per_device_batch_size': 16, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_save_every_n_steps': 10000, 'finetune_global_batch_size': 128, 'finetune_per_device_batch_size': 4, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data2'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+7b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'prismatic', 'wandb_entity': None, '_wandb': {}}
-2025-08-09 00:43:53,602 INFO    MainThread:332325 [wandb_init.py:init():871] starting backend
-2025-08-09 00:43:53,878 INFO    MainThread:332325 [wandb_init.py:init():874] sending inform_init request
-2025-08-09 00:43:53,896 INFO    MainThread:332325 [wandb_init.py:init():882] backend started and connected
-2025-08-09 00:43:53,901 INFO    MainThread:332325 [wandb_init.py:init():953] updated telemetry
-2025-08-09 00:43:53,957 INFO    MainThread:332325 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
-2025-08-09 00:43:54,674 INFO    MainThread:332325 [wandb_init.py:init():1029] starting run threads in backend
-2025-08-09 00:43:55,488 INFO    MainThread:332325 [wandb_run.py:_console_start():2494] atexit reg
-2025-08-09 00:43:55,488 INFO    MainThread:332325 [wandb_run.py:_redirect():2342] redirect: wrap_raw
-2025-08-09 00:43:55,488 INFO    MainThread:332325 [wandb_run.py:_redirect():2411] Wrapping output streams.
-2025-08-09 00:43:55,488 INFO    MainThread:332325 [wandb_run.py:_redirect():2434] Redirects installed.
-2025-08-09 00:43:55,504 INFO    MainThread:332325 [wandb_init.py:init():1075] run started, returning control to user process
-2025-08-09 07:37:35,382 INFO    MainThread:332325 [wandb_run.py:_finish():2260] finishing run happyhappy/prismatic/jppa1ary
-2025-08-09 07:37:35,385 INFO    MainThread:332325 [wandb_run.py:_atexit_cleanup():2459] got exitcode: 0
-2025-08-09 07:37:35,387 INFO    MainThread:332325 [wandb_run.py:_restore():2441] restore
-2025-08-09 07:37:35,387 INFO    MainThread:332325 [wandb_run.py:_restore():2447] restore done
-2025-08-09 07:37:36,501 INFO    MainThread:332325 [wandb_run.py:_footer_history_summary_info():3895] rendering history
-2025-08-09 07:37:36,502 INFO    MainThread:332325 [wandb_run.py:_footer_history_summary_info():3927] rendering summary
-2025-08-09 07:37:36,502 INFO    MainThread:332325 [wandb_run.py:_footer_sync_info():3856] logging synced files

wandb/run-20250809_004353-jppa1ary/run-jppa1ary.wandb DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:98046782432a70779157e88c06e57847061e088c070e5e4db878dc0527989278
-size 16589936

wandb/run-20250809_074602-gpyuprau/files/config.yaml DELETED Viewed

@@ -1,205 +0,0 @@
-_wandb:
-    value:
-        cli_version: 0.21.1
-        e:
-            3taj0yyhnh9dhglq0763fsp13ob8f2ee:
-                args:
-                    - --model.type
-                    - prism-qwen25-extra-dinosiglip-224px+3b
-                    - --model.finetune_per_device_batch_size
-                    - "4"
-                codePath: scripts/pretrain.py
-                codePathLocal: scripts/pretrain.py
-                cpu_count: 96
-                cpu_count_logical: 192
-                cudaVersion: "12.8"
-                disk:
-                    /:
-                        total: "520120602624"
-                        used: "64348778496"
-                email: bjyoon513@gmail.com
-                executable: /fsx/byungjun/miniconda3/envs/minivla/bin/python3.10
-                git:
-                    commit: a72e9ce42035282eb6c950204e50ef3c4fbb363d
-                    remote: https://github.com/happyhappy-jun/openvla-mini
-                gpu: NVIDIA H200
-                gpu_count: 8
-                gpu_nvidia:
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-95044091-c6a6-4e9d-26a3-0249feeaf796
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-e54a8b43-5dd9-a2f8-8a71-b254a12248ec
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-daed9c7c-6f35-ec0c-abd5-49e1f7d48645
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-acf2a7ee-d8a1-bb8c-be49-c7a07c0f07da
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-0245a021-19ca-991a-61b0-94cbc116d182
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-4213a83d-27d3-97d3-0cec-f9700637d48c
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-8be9f5c6-a214-8b33-0ac2-217892edfa6f
-                    - architecture: Hopper
-                      cudaCores: 16896
-                      memoryTotal: "150754820096"
-                      name: NVIDIA H200
-                      uuid: GPU-5c5fce07-faf7-1345-d5ea-4c13e75769e7
-                host: compute-st-kait-gpu-2
-                memory:
-                    total: "2147425312768"
-                os: Linux-6.8.0-1028-aws-x86_64-with-glibc2.35
-                program: /fsx/byungjun/openvla-mini/scripts/pretrain.py
-                python: CPython 3.10.18
-                root: runs/prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7
-                slurm:
-                    cluster_name: kait-gpu-06-parallelcluster
-                    conf: /opt/slurm/etc/slurm.conf
-                    cpus_on_node: "128"
-                    gpus: "8"
-                    gpus_on_node: "8"
-                    gtids: "0"
-                    job_cpus_per_node: "128"
-                    job_end_time: "1754984478"
-                    job_gid: "1004"
-                    job_gpus: 0,1,2,3,4,5,6,7
-                    job_id: "531"
-                    job_name: qwen_3b_vlm_finetune
-                    job_nodelist: compute-st-kait-gpu-2
-                    job_num_nodes: "1"
-                    job_partition: batch2
-                    job_start_time: "1754725278"
-                    job_uid: "1004"
-                    job_user: byungjun
-                    jobid: "531"
-                    localid: "0"
-                    nnodes: "1"
-                    nodeid: "0"
-                    nodelist: compute-st-kait-gpu-2
-                    prio_process: "0"
-                    procid: "0"
-                    submit_dir: /fsx/byungjun/openvla-mini
-                    submit_host: ip-10-10-47-245
-                    task_pid: "511629"
-                    tasks_per_node: "128"
-                    topology_addr: compute-st-kait-gpu-2
-                    topology_addr_pattern: node
-                startedAt: "2025-08-09T07:46:02.753034Z"
-                writerId: 3taj0yyhnh9dhglq0763fsp13ob8f2ee
-        m: []
-        python_version: 3.10.18
-        t:
-            "1":
-                - 1
-                - 2
-                - 3
-                - 11
-                - 41
-                - 49
-                - 63
-                - 71
-            "2":
-                - 1
-                - 2
-                - 3
-                - 11
-                - 41
-                - 49
-                - 63
-                - 71
-            "3":
-                - 2
-                - 13
-                - 16
-                - 61
-            "4": 3.10.18
-            "5": 0.21.1
-            "6": 4.40.1
-            "12": 0.21.1
-            "13": linux-x86_64
-dataset:
-    value:
-        align_stage_components:
-            - download/llava-laion-cc-sbu-558k/chat.json
-            - download/llava-laion-cc-sbu-558k
-        dataset_id: llava-v15
-        dataset_root_dir: data2
-        finetune_stage_components:
-            - download/llava-v1.5-instruct/llava_v1_5_mix665k.json
-            - download/llava-v1.5-instruct
-        type: llava-v15
-hf_token:
-    value: .hf_token
-model:
-    value:
-        align_epochs: 1
-        align_global_batch_size: 96
-        align_learning_rate: 0.001
-        align_lr_scheduler_type: linear-warmup+cosine-decay
-        align_max_grad_norm: 1
-        align_max_steps: null
-        align_per_device_batch_size: 16
-        align_save_every_n_steps: 10000
-        align_train_strategy: fsdp-shard-grad-op
-        align_warmup_ratio: 0.03
-        align_weight_decay: 0
-        arch_specifier: no-align+fused-gelu-mlp
-        enable_gradient_checkpointing: true
-        enable_mixed_precision_training: true
-        finetune_epochs: 2
-        finetune_global_batch_size: 128
-        finetune_learning_rate: 2e-05
-        finetune_lr_scheduler_type: linear-warmup+cosine-decay
-        finetune_max_grad_norm: 1
-        finetune_max_steps: null
-        finetune_per_device_batch_size: 4
-        finetune_save_every_n_steps: 10000
-        finetune_train_strategy: fsdp-full-shard
-        finetune_warmup_ratio: 0.03
-        finetune_weight_decay: 0.1
-        image_resize_strategy: resize-naive
-        image_sequence_len: 1
-        llm_backbone_id: qwen25-3b-extra
-        llm_max_length: 32768
-        model_id: prism-qwen25-extra-dinosiglip-224px+3b
-        reduce_in_full_precision: false
-        type: prism-qwen25-extra-dinosiglip-224px+3b
-        vision_backbone_id: dinosiglip-vit-so-224px
-pretrained_checkpoint:
-    value: null
-run_id:
-    value: prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7
-run_root_dir:
-    value: runs
-seed:
-    value: 7
-stage:
-    value: finetune
-trackers:
-    value:
-        - jsonl
-        - wandb
-wandb_entity:
-    value: null
-wandb_project:
-    value: prismatic

wandb/run-20250809_074602-gpyuprau/files/output.log DELETED Viewed

@@ -1,4 +0,0 @@
-08/09 [07:46:04] INFO     | >> [*] Starting Training Loop        pretrain.py:231
-08/09 [14:11:41] INFO     | >> [*] Done with Training =>>        pretrain.py:235
-                          Finalizing Metrics

wandb/run-20250809_074602-gpyuprau/files/requirements.txt DELETED Viewed

@@ -1,154 +0,0 @@
-nvidia-nvtx-cu12==12.1.105
-kiwisolver==1.4.8
-contourpy==1.3.2
-nvidia-cudnn-cu12==8.9.2.26
-tokenizers==0.19.1
-nvidia-cuda-runtime-cu12==12.1.105
-triton==2.2.0
-hf-xet==1.1.7
-mkl-service==2.4.0
-mkl_random==1.2.8
-pycparser==2.21
-ml-dtypes==0.2.0
-tensorflow==2.15.0
-nvidia-cufft-cu12==11.0.2.54
-pyasn1_modules==0.4.2
-numpy==1.26.4
-numpy==2.0.1
-mypy_extensions==1.1.0
-mkl_fft==1.3.11
-mdurl==0.1.2
-flash-attn==2.5.5
-six==1.17.0
-zipp==3.23.0
-dlimp==0.0.1
-json-numpy==2.1.1
-PySocks==1.7.1
-cffi==1.17.1
-Werkzeug==3.1.3
-rsa==4.9.1
-packaging==25.0
-draccus==0.8.0
-typing-inspection==0.4.1
-Markdown==3.8.2
-wandb==0.21.1
-trimesh==4.7.1
-Pygments==2.19.2
-pillow==11.3.0
-libclang==18.1.1
-typing-inspect==0.9.0
-attrs==25.3.0
-scipy==1.15.3
-scipy==1.11.2
-wrapt==1.14.1
-safetensors==0.6.2
-nvidia-curand-cu12==10.3.2.106
-etils==1.13.0
-OpenEXR==3.3.5
-smmap==5.0.2
-sentencepiece==0.1.99
-pyparsing==3.2.3
-astunparse==1.6.3
-opt_einsum==3.4.0
-tensorflow-graphics==2021.12.3
-fsspec==2025.7.0
-sympy==1.13.3
-timm==0.9.10
-pydantic==2.11.7
-tensorboard==2.15.2
-brotlicffi==1.0.9.2
-torch==2.2.0
-flatbuffers==25.2.10
-filelock==3.17.0
-click==8.2.1
-nvidia-cuda-cupti-cu12==12.1.105
-ninja==1.11.1.4
-typeguard==2.13.3
-nvidia-nccl-cu12==2.19.3
-openvla==0.0.3
-MarkupSafe==3.0.2
-rich==14.1.0
-nvidia-nvjitlink-cu12==12.9.86
-tensorflow-datasets==4.9.3
-tensorflow-io-gcs-filesystem==0.37.1
-networkx==3.4.2
-huggingface-hub==0.34.4
-absl-py==2.3.1
-nvidia-cublas-cu12==12.1.3.1
-torchaudio==2.2.0
-gmpy2==2.2.1
-array_record==0.7.2
-tensorflow-addons==0.23.0
-oauthlib==3.3.1
-PyYAML==6.0.2
-regex==2025.7.34
-nvidia-cuda-nvrtc-cu12==12.1.105
-setuptools==78.1.1
-toml==0.10.2
-google-auth==2.40.3
-certifi==2025.8.3
-keras==2.15.0
-torchvision==0.17.0
-grpcio==1.74.0
-fonttools==4.59.0
-transformers==4.40.1
-annotated-types==0.7.0
-charset-normalizer==3.3.2
-promise==2.3
-mergedeep==1.3.4
-gast==0.6.0
-cachetools==5.5.2
-termcolor==3.1.0
-pyyaml-include==1.4.1
-importlib_resources==6.5.2
-nvidia-cusolver-cu12==11.4.5.107
-h5py==3.14.0
-python-dateutil==2.9.0.post0
-peft==0.11.1
-urllib3==2.5.0
-einops==0.8.1
-tensorflow-estimator==2.15.0
-requests==2.32.4
-psutil==7.0.0
-requests-oauthlib==2.0.0
-pip==25.1
-markdown-it-py==3.0.0
-nvidia-cusparse-cu12==12.1.0.106
-idna==3.7
-tqdm==4.67.1
-dm-tree==0.1.9
-gitdb==4.0.12
-typing_extensions==4.12.2
-matplotlib==3.10.5
-accelerate==1.10.0
-tensorflow-metadata==1.17.2
-sentry-sdk==2.34.1
-jsonlines==4.0.0
-protobuf==4.21.12
-pyasn1==0.6.1
-google-pasta==0.2.0
-mpmath==1.3.0
-Jinja2==3.1.6
-tensorboard-data-server==0.7.2
-pydantic_core==2.33.2
-google-auth-oauthlib==1.2.2
-cycler==0.12.1
-platformdirs==4.3.8
-GitPython==3.1.45
-wheel==0.45.1
-backports.tarfile==1.2.0
-jaraco.collections==5.1.0
-autocommand==2.2.2
-typeguard==4.3.0
-tomli==2.0.1
-importlib_metadata==8.0.0
-platformdirs==4.2.2
-wheel==0.45.1
-more-itertools==10.3.0
-inflect==7.3.1
-jaraco.context==5.3.0
-typing_extensions==4.12.2
-jaraco.functools==4.0.1
-packaging==24.2
-zipp==3.19.2
-jaraco.text==3.12.1

wandb/run-20250809_074602-gpyuprau/files/wandb-metadata.json DELETED Viewed

@@ -1,128 +0,0 @@
-{
-  "os": "Linux-6.8.0-1028-aws-x86_64-with-glibc2.35",
-  "python": "CPython 3.10.18",
-  "startedAt": "2025-08-09T07:46:02.753034Z",
-  "args": [
-    "--model.type",
-    "prism-qwen25-extra-dinosiglip-224px+3b",
-    "--model.finetune_per_device_batch_size",
-    "4"
-  ],
-  "program": "/fsx/byungjun/openvla-mini/scripts/pretrain.py",
-  "codePath": "scripts/pretrain.py",
-  "codePathLocal": "scripts/pretrain.py",
-  "git": {
-    "remote": "https://github.com/happyhappy-jun/openvla-mini",
-    "commit": "a72e9ce42035282eb6c950204e50ef3c4fbb363d"
-  },
-  "email": "bjyoon513@gmail.com",
-  "root": "runs/prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7",
-  "host": "compute-st-kait-gpu-2",
-  "executable": "/fsx/byungjun/miniconda3/envs/minivla/bin/python3.10",
-  "cpu_count": 96,
-  "cpu_count_logical": 192,
-  "gpu": "NVIDIA H200",
-  "gpu_count": 8,
-  "disk": {
-    "/": {
-      "total": "520120602624",
-      "used": "64348778496"
-    }
-  },
-  "memory": {
-    "total": "2147425312768"
-  },
-  "gpu_nvidia": [
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-95044091-c6a6-4e9d-26a3-0249feeaf796"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-e54a8b43-5dd9-a2f8-8a71-b254a12248ec"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-daed9c7c-6f35-ec0c-abd5-49e1f7d48645"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-acf2a7ee-d8a1-bb8c-be49-c7a07c0f07da"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-0245a021-19ca-991a-61b0-94cbc116d182"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-4213a83d-27d3-97d3-0cec-f9700637d48c"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-8be9f5c6-a214-8b33-0ac2-217892edfa6f"
-    },
-    {
-      "name": "NVIDIA H200",
-      "memoryTotal": "150754820096",
-      "cudaCores": 16896,
-      "architecture": "Hopper",
-      "uuid": "GPU-5c5fce07-faf7-1345-d5ea-4c13e75769e7"
-    }
-  ],
-  "cudaVersion": "12.8",
-  "slurm": {
-    "cluster_name": "kait-gpu-06-parallelcluster",
-    "conf": "/opt/slurm/etc/slurm.conf",
-    "cpus_on_node": "128",
-    "gpus": "8",
-    "gpus_on_node": "8",
-    "gtids": "0",
-    "job_cpus_per_node": "128",
-    "job_end_time": "1754984478",
-    "job_gid": "1004",
-    "job_gpus": "0,1,2,3,4,5,6,7",
-    "job_id": "531",
-    "job_name": "qwen_3b_vlm_finetune",
-    "job_nodelist": "compute-st-kait-gpu-2",
-    "job_num_nodes": "1",
-    "job_partition": "batch2",
-    "job_start_time": "1754725278",
-    "job_uid": "1004",
-    "job_user": "byungjun",
-    "jobid": "531",
-    "localid": "0",
-    "nnodes": "1",
-    "nodeid": "0",
-    "nodelist": "compute-st-kait-gpu-2",
-    "prio_process": "0",
-    "procid": "0",
-    "submit_dir": "/fsx/byungjun/openvla-mini",
-    "submit_host": "ip-10-10-47-245",
-    "task_pid": "511629",
-    "tasks_per_node": "128",
-    "topology_addr": "compute-st-kait-gpu-2",
-    "topology_addr_pattern": "node"
-  },
-  "writerId": "3taj0yyhnh9dhglq0763fsp13ob8f2ee"
-}

wandb/run-20250809_074602-gpyuprau/files/wandb-summary.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"Finetune/Step":10396,"Finetune/Loss":0.5836188793182373,"_wandb":{"runtime":23137},"_runtime":23137.460341213,"_step":10396,"Finetune/Learning Rate":0,"_timestamp":1.7547486698740156e+09,"Finetune/Step Time":2.2515620701014996,"Finetune/Loss (Raw)":0.6342841386795044}

wandb/run-20250809_074602-gpyuprau/logs/debug-core.log DELETED Viewed

@@ -1,16 +0,0 @@
-{"time":"2025-08-09T07:46:03.108885616Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmprujohv9o/port-511740.txt","pid":511740,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
-{"time":"2025-08-09T07:46:03.110958048Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":511740}
-{"time":"2025-08-09T07:46:03.110958202Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-511740-513590-3014293601/socket","Net":"unix"}}
-{"time":"2025-08-09T07:46:03.170319779Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
-{"time":"2025-08-09T07:46:03.190240086Z","level":"INFO","msg":"handleInformInit: received","streamId":"gpyuprau","id":"1(@)"}
-{"time":"2025-08-09T07:46:03.636533053Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"gpyuprau","id":"1(@)"}
-{"time":"2025-08-09T14:11:42.468517268Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"gpyuprau","id":"1(@)"}
-{"time":"2025-08-09T14:11:42.587479793Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"gpyuprau","id":"1(@)"}
-{"time":"2025-08-09T14:15:15.093597184Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
-{"time":"2025-08-09T14:15:15.093715872Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
-{"time":"2025-08-09T14:15:15.093729541Z","level":"INFO","msg":"server is shutting down"}
-{"time":"2025-08-09T14:15:15.093767704Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
-{"time":"2025-08-09T14:15:15.093829956Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-511740-513590-3014293601/socket","Net":"unix"}}
-{"time":"2025-08-09T14:15:15.09390158Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
-{"time":"2025-08-09T14:15:15.093983904Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
-{"time":"2025-08-09T14:15:15.094005344Z","level":"INFO","msg":"server is closed"}

wandb/run-20250809_074602-gpyuprau/logs/debug-internal.log DELETED Viewed

@@ -1,12 +0,0 @@
-{"time":"2025-08-09T07:46:03.192885763Z","level":"INFO","msg":"stream: starting","core version":"0.21.1"}
-{"time":"2025-08-09T07:46:03.636468673Z","level":"INFO","msg":"stream: created new stream","id":"gpyuprau"}
-{"time":"2025-08-09T07:46:03.636525114Z","level":"INFO","msg":"stream: started","id":"gpyuprau"}
-{"time":"2025-08-09T07:46:03.636549213Z","level":"INFO","msg":"writer: started","stream_id":"gpyuprau"}
-{"time":"2025-08-09T07:46:03.636570228Z","level":"INFO","msg":"sender: started","stream_id":"gpyuprau"}
-{"time":"2025-08-09T07:46:03.636595816Z","level":"INFO","msg":"handler: started","stream_id":"gpyuprau"}
-{"time":"2025-08-09T14:11:42.18522232Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-08-09T14:11:42.426873859Z","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading history steps 10395-10395, summary, console lines 1-3","runtime_seconds":0.229336216}],"total_operations":1}}
-{"time":"2025-08-09T14:11:42.471754339Z","level":"INFO","msg":"stream: closing","id":"gpyuprau"}
-{"time":"2025-08-09T14:11:42.471779588Z","level":"INFO","msg":"handler: closed","stream_id":"gpyuprau"}
-{"time":"2025-08-09T14:11:42.472617651Z","level":"INFO","msg":"sender: closed","stream_id":"gpyuprau"}
-{"time":"2025-08-09T14:11:42.472627835Z","level":"INFO","msg":"stream: closed","id":"gpyuprau"}

wandb/run-20250809_074602-gpyuprau/logs/debug.log DELETED Viewed

@@ -1,28 +0,0 @@
-2025-08-09 07:46:02,871 INFO    MainThread:511740 [wandb_setup.py:_flush():80] Current SDK version is 0.21.1
-2025-08-09 07:46:02,872 INFO    MainThread:511740 [wandb_setup.py:_flush():80] Configure stats pid to 511740
-2025-08-09 07:46:02,872 INFO    MainThread:511740 [wandb_setup.py:_flush():80] Loading settings from /fsx/byungjun/.config/wandb/settings
-2025-08-09 07:46:02,872 INFO    MainThread:511740 [wandb_setup.py:_flush():80] Loading settings from /fsx/byungjun/openvla-mini/wandb/settings
-2025-08-09 07:46:02,872 INFO    MainThread:511740 [wandb_setup.py:_flush():80] Loading settings from environment variables
-2025-08-09 07:46:02,872 INFO    MainThread:511740 [wandb_init.py:setup_run_log_directory():703] Logging user logs to runs/prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7/wandb/run-20250809_074602-gpyuprau/logs/debug.log
-2025-08-09 07:46:02,872 INFO    MainThread:511740 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to runs/prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7/wandb/run-20250809_074602-gpyuprau/logs/debug-internal.log
-2025-08-09 07:46:02,872 INFO    MainThread:511740 [wandb_init.py:init():830] calling init triggers
-2025-08-09 07:46:02,872 INFO    MainThread:511740 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
-config: {'model': {'type': 'prism-qwen25-extra-dinosiglip-224px+3b', 'model_id': 'prism-qwen25-extra-dinosiglip-224px+3b', 'arch_specifier': 'no-align+fused-gelu-mlp', 'vision_backbone_id': 'dinosiglip-vit-so-224px', 'llm_backbone_id': 'qwen25-3b-extra', 'image_resize_strategy': 'resize-naive', 'llm_max_length': 32768, 'image_sequence_len': 1, 'align_epochs': 1, 'align_max_steps': None, 'align_save_every_n_steps': 10000, 'align_global_batch_size': 96, 'align_per_device_batch_size': 16, 'align_learning_rate': 0.001, 'align_weight_decay': 0.0, 'align_max_grad_norm': 1.0, 'align_lr_scheduler_type': 'linear-warmup+cosine-decay', 'align_warmup_ratio': 0.03, 'align_train_strategy': 'fsdp-shard-grad-op', 'finetune_epochs': 2, 'finetune_max_steps': None, 'finetune_save_every_n_steps': 10000, 'finetune_global_batch_size': 128, 'finetune_per_device_batch_size': 4, 'finetune_learning_rate': 2e-05, 'finetune_weight_decay': 0.1, 'finetune_max_grad_norm': 1.0, 'finetune_lr_scheduler_type': 'linear-warmup+cosine-decay', 'finetune_warmup_ratio': 0.03, 'finetune_train_strategy': 'fsdp-full-shard', 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': False}, 'dataset': {'type': 'llava-v15', 'dataset_id': 'llava-v15', 'align_stage_components': ['download/llava-laion-cc-sbu-558k/chat.json', 'download/llava-laion-cc-sbu-558k'], 'finetune_stage_components': ['download/llava-v1.5-instruct/llava_v1_5_mix665k.json', 'download/llava-v1.5-instruct'], 'dataset_root_dir': 'data2'}, 'stage': 'finetune', 'pretrained_checkpoint': None, 'run_id': 'prism-qwen25-extra-dinosiglip-224px+3b+stage-finetune+x7', 'run_root_dir': 'runs', 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'prismatic', 'wandb_entity': None, '_wandb': {}}
-2025-08-09 07:46:02,872 INFO    MainThread:511740 [wandb_init.py:init():871] starting backend
-2025-08-09 07:46:03,170 INFO    MainThread:511740 [wandb_init.py:init():874] sending inform_init request
-2025-08-09 07:46:03,188 INFO    MainThread:511740 [wandb_init.py:init():882] backend started and connected
-2025-08-09 07:46:03,192 INFO    MainThread:511740 [wandb_init.py:init():953] updated telemetry
-2025-08-09 07:46:03,263 INFO    MainThread:511740 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
-2025-08-09 07:46:03,946 INFO    MainThread:511740 [wandb_init.py:init():1029] starting run threads in backend
-2025-08-09 07:46:04,803 INFO    MainThread:511740 [wandb_run.py:_console_start():2494] atexit reg
-2025-08-09 07:46:04,803 INFO    MainThread:511740 [wandb_run.py:_redirect():2342] redirect: wrap_raw
-2025-08-09 07:46:04,803 INFO    MainThread:511740 [wandb_run.py:_redirect():2411] Wrapping output streams.
-2025-08-09 07:46:04,804 INFO    MainThread:511740 [wandb_run.py:_redirect():2434] Redirects installed.
-2025-08-09 07:46:04,823 INFO    MainThread:511740 [wandb_init.py:init():1075] run started, returning control to user process
-2025-08-09 14:11:41,401 INFO    MainThread:511740 [wandb_run.py:_finish():2260] finishing run happyhappy/prismatic/gpyuprau
-2025-08-09 14:11:41,406 INFO    MainThread:511740 [wandb_run.py:_atexit_cleanup():2459] got exitcode: 0
-2025-08-09 14:11:41,407 INFO    MainThread:511740 [wandb_run.py:_restore():2441] restore
-2025-08-09 14:11:41,407 INFO    MainThread:511740 [wandb_run.py:_restore():2447] restore done
-2025-08-09 14:11:42,461 INFO    MainThread:511740 [wandb_run.py:_footer_history_summary_info():3895] rendering history
-2025-08-09 14:11:42,466 INFO    MainThread:511740 [wandb_run.py:_footer_history_summary_info():3927] rendering summary
-2025-08-09 14:11:42,466 INFO    MainThread:511740 [wandb_run.py:_footer_sync_info():3856] logging synced files

wandb/run-20250809_074602-gpyuprau/run-gpyuprau.wandb DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:29c3269b94110e36b4df335f692afa5bd1810092f1d21e8fc12aee08f7e901fa
-size 16169201