htlou commited on May 8, 2025

Commit

9d89ed1

verified ·

1 Parent(s): ebbb000

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
arguments.yaml +10 -12
environ.txt +24 -15
script.sh +28 -26
slice_1945/added_tokens.json +3 -0
slice_1945/chat_template.json +3 -0
slice_1945/config.json +89 -0
slice_1945/preprocessor_config.json +52 -0
slice_1945/processor_config.json +7 -0
slice_1945/pytorch_model.bin +3 -0
slice_1945/special_tokens_map.json +31 -0
slice_1945/tokenizer.json +0 -0
slice_1945/tokenizer.model +3 -0
slice_1945/tokenizer_config.json +56 -0
slice_3890/added_tokens.json +3 -0
slice_3890/chat_template.json +3 -0
slice_3890/config.json +89 -0
slice_3890/preprocessor_config.json +52 -0
slice_3890/processor_config.json +7 -0
slice_3890/pytorch_model.bin +3 -0
slice_3890/special_tokens_map.json +31 -0
slice_3890/tokenizer.json +0 -0
slice_3890/tokenizer.model +3 -0
slice_3890/tokenizer_config.json +56 -0
slice_5835/added_tokens.json +3 -0
slice_5835/chat_template.json +3 -0
slice_5835/config.json +89 -0
slice_5835/preprocessor_config.json +52 -0
slice_5835/processor_config.json +7 -0
slice_5835/pytorch_model.bin +3 -0
slice_5835/special_tokens_map.json +31 -0
slice_5835/tokenizer.json +0 -0
slice_5835/tokenizer.model +3 -0
slice_5835/tokenizer_config.json +56 -0
slice_end/added_tokens.json +1 -2
slice_end/config.json +13 -9
slice_end/pytorch_model.bin +2 -2
slice_end/special_tokens_map.json +1 -1
slice_end/tokenizer.json +0 -0
slice_end/tokenizer.model +2 -2
slice_end/tokenizer_config.json +4 -17
wandb/debug-internal.log +16 -16
wandb/debug.log +30 -30
wandb/run-20250401_113714-csgd5etr/files/config.yaml +95 -0
wandb/run-20250401_113714-csgd5etr/files/output.log +0 -0
wandb/run-20250401_113714-csgd5etr/files/requirements.txt +258 -0
wandb/run-20250401_113714-csgd5etr/files/wandb-metadata.json +106 -0
wandb/run-20250401_113714-csgd5etr/files/wandb-summary.json +1 -0
wandb/run-20250401_113714-csgd5etr/logs/debug-core.log +14 -0
wandb/run-20250401_113714-csgd5etr/logs/debug-internal.log +16 -0

.gitattributes CHANGED Viewed

@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 wandb/run-20250331_180548-xid0f47f/run-xid0f47f.wandb filter=lfs diff=lfs merge=lfs -text
 wandb/run-20250329_055156-k1abll4c/run-k1abll4c.wandb filter=lfs diff=lfs merge=lfs -text
 wandb/run-20250330_120229-onfejhxh/run-onfejhxh.wandb filter=lfs diff=lfs merge=lfs -text

 wandb/run-20250331_180548-xid0f47f/run-xid0f47f.wandb filter=lfs diff=lfs merge=lfs -text
 wandb/run-20250329_055156-k1abll4c/run-k1abll4c.wandb filter=lfs diff=lfs merge=lfs -text
 wandb/run-20250330_120229-onfejhxh/run-onfejhxh.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250401_113714-csgd5etr/run-csgd5etr.wandb filter=lfs diff=lfs merge=lfs -text

arguments.yaml CHANGED Viewed

@@ -1,37 +1,34 @@
 data_cfgs:
   eval_data_files: {}
   eval_datasets: {}
-  eval_name: {}
   eval_optional_args: []
   eval_size: {}
   eval_split: {}
   eval_subset: {}
   eval_template: {}
-  load_multi_datasets: false
   train_data_files: {}
-  train_datasets: /aifs4su/yaodong/hantao/datasets/MMInstruct-GPT4V_mistral-7b_cosi_cut/merged/top1-40
   train_name: text-image-to-text
   train_optional_args: []
   train_size: {}
   train_split: train
-  train_template: MM_TI2T_LLAVA
 logger_cfgs:
   cache_dir: {}
   log_project: align-anything
-  log_run_name: sft
   log_type: wandb
-  output_dir: ../outputs/LLAVA_7B_cosi/top1-40
-  save_total_limit: 6
 model_cfgs:
   model_max_length: 4096
-  model_name_or_path: /aifs4su/yaodong/hantao/models/llava-v1.6-mistral-7b-hf
   trust_remote_code: true
 special_tokens: {}
 train_cfgs:
   adam_betas:
   - 0.9
   - 0.95
-  adam_epsilon: 1.0e-08
   bf16: true
   ds_cfgs: ds_z3_config.json
   epochs: 3
@@ -41,15 +38,16 @@ train_cfgs:
   freeze_language_model: false
   freeze_mm_proj: false
   freeze_vision_tower: true
-  gradient_accumulation_steps: 16
   gradient_checkpointing: true
-  learning_rate: 2.0e-05
   load_checkpoint: false
   lr_scheduler_type: cosine
   lr_warmup_ratio: 0.03
-  max_grad_norm: 1.0
   per_device_eval_batch_size: 1
   per_device_train_batch_size: 1
   save_checkpoint: false
   seed: 42
   weight_decay: 0.0

 data_cfgs:
   eval_data_files: {}
   eval_datasets: {}
   eval_optional_args: []
   eval_size: {}
   eval_split: {}
   eval_subset: {}
   eval_template: {}
   train_data_files: {}
+  train_datasets: /aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-7b_cosi_cut/merged/top1-40
   train_name: text-image-to-text
   train_optional_args: []
   train_size: {}
   train_split: train
+  train_template: AA_TI2T_LLAVA
 logger_cfgs:
   cache_dir: {}
   log_project: align-anything
+  log_run_name: dpo
   log_type: wandb
+  output_dir: ../outputs/llava_1.6_vicuna_7B_cosi/top1-40
+  save_total_limit: 3
 model_cfgs:
   model_max_length: 4096
+  model_name_or_path: /aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-7b-hf
   trust_remote_code: true
 special_tokens: {}
 train_cfgs:
   adam_betas:
   - 0.9
   - 0.95
   bf16: true
   ds_cfgs: ds_z3_config.json
   epochs: 3
   freeze_language_model: false
   freeze_mm_proj: false
   freeze_vision_tower: true
+  gradient_accumulation_steps: 1
   gradient_checkpointing: true
+  learning_rate: 1.0e-06
   load_checkpoint: false
   lr_scheduler_type: cosine
   lr_warmup_ratio: 0.03
   per_device_eval_batch_size: 1
   per_device_train_batch_size: 1
+  regularization: 0.001
   save_checkpoint: false
+  scale_coeff: 0.1
   seed: 42
   weight_decay: 0.0

environ.txt CHANGED Viewed

@@ -81,13 +81,16 @@ BASH_FUNC_switchml%%=() {  typeset swfound=1;
  return 1;
  fi
 }
 BUILD=x86_64-conda-linux-gnu
 CC=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cc
 CC_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cc
 CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include  -I/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/include  -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
 CMAKE_ARGS=-DCMAKE_AR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ar -DCMAKE_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ranlib -DCMAKE_LINKER=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ld -DCMAKE_STRIP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strip -DCMAKE_BUILD_TYPE=Release
 CMAKE_PREFIX_PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama:/aifs4su/yaodong/miniconda3/envs/hantao_llama/x86_64-conda-linux-gnu/sysroot/usr
 CMD_WLM_CLUSTER_NAME=slurm
 CONDA_BUILD_SYSROOT=/aifs4su/yaodong/miniconda3/envs/hantao_llama/x86_64-conda-linux-gnu/sysroot
 CONDA_DEFAULT_ENV=hantao_llama
 CONDA_EXE=/aifs4su/yaodong/miniconda3/bin/conda
@@ -109,6 +112,7 @@ CXXFILT=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu
 CXXFLAGS=-fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include  -I/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/include  -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
 CXX_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-c++
 DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1028/bus
 DEBUG_CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include
 DEBUG_CPPFLAGS=-D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include
 DEBUG_CXXFLAGS=-fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include
@@ -118,6 +122,7 @@ GCC=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc
 GCC_AR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-ar
 GCC_NM=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-nm
 GCC_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-ranlib
 GPROF=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gprof
 GSETTINGS_SCHEMA_DIR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/share/glib-2.0/schemas
 GSETTINGS_SCHEMA_DIR_CONDA_BACKUP=
@@ -151,12 +156,11 @@ LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd
 MANPATH=/usr/mpi/gcc/openmpi-4.1.7a1/share/man:/cm/shared/apps/slurm/current/man:/cm/local/apps/environment-modules/4.5.3/share/man:/usr/local/man:/usr/local/share/man:/usr/share/man:/cm/local/apps/environment-modules/current/share/man:/cm/local/apps/environment-modules/current/share/man
 MANPATH_modshare=/usr/local/share/man:1:/usr/mpi/gcc/openmpi-4.1.7a1/share/man:1:/cm/local/apps/environment-modules/current/share/man:1:/cm/local/apps/environment-modules/4.5.3/share/man:1:/usr/local/man:1:/usr/share/man:1:/cm/shared/apps/slurm/current/man:1
 MASTER_ADDR=127.0.0.1
-MASTER_PORT=46590
 MIG_PARTED_CHECKPOINT_FILE=/var/lib/nvidia-mig-manager/checkpoint.json
 MIG_PARTED_CONFIG_FILE=/etc/nvidia-mig-manager/config.yaml
 MIG_PARTED_HOOKS_FILE=/etc/nvidia-mig-manager/hooks.yaml
 MODULEPATH=/cm/local/modulefiles:/cm/shared/modulefiles
-MODULEPATH_modshare=/cm/local/modulefiles:1:/cm/shared/modulefiles:3
 MODULESHOME=/cm/local/apps/environment-modules/4.5.3
 MODULES_CMD=/cm/local/apps/environment-modules/4.5.3/libexec/modulecmd.tcl
 MODULES_SET_SHELL_STARTUP=0
@@ -170,38 +174,43 @@ NVITOP_MONITOR_MODE=colorful
 OBJCOPY=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-objcopy
 OBJDUMP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-objdump
 OLDPWD=/home/yangyaodong
-PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin:/aifs4su/yaodong/miniconda3/condabin:/usr/mpi/gcc/openmpi-4.1.7a1/bin:/usr/lpp/mmfs/bin:/usr/local/cuda/bin:/opt/bin:/usr/lpp/mmfs/bin:/cm/shared/apps/slurm/current/sbin:/cm/shared/apps/slurm/current/bin:/usr/local/cuda/bin:/opt/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/sbin:/usr/sbin:/cm/local/apps/environment-modules/4.5.3/bin
-PATH_modshare=/usr/mpi/gcc/openmpi-4.1.7a1/bin:1:/opt/bin/:1:/usr/bin:1:/usr/local/bin:1:/cm/shared/apps/slurm/current/bin:1:/cm/shared/apps/slurm/current/sbin:1:/bin:1:/snap/bin:1:/sbin:1:/usr/sbin:1:/cm/local/apps/environment-modules/4.5.3/bin:1:/usr/games:1:/usr/local/sbin:1:/usr/lpp/mmfs/bin:1:/usr/local/cuda/bin:1:/usr/local/games:1
 PWD=/aifs4su/yaodong/hantao/align-anything/scripts
 PYTHONHASHSEED=42
-PYTHONPATH=/aifs4su/yaodong/hantao/align-anything/scripts
-QT_QPA_FONTDIR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/cv2/qt/fonts
-QT_QPA_PLATFORM_PLUGIN_PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/cv2/qt/plugins
 RANK=0
 RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ranlib
 READELF=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-readelf
 SHELL=/bin/bash
-SHLVL=3
 SIZE=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-size
 SLURM_CONF=/cm/shared/apps/slurm/var/etc/slurm/slurm.conf
-SSH_CLIENT=10.33.4.51 34180 22
-SSH_CONNECTION=10.33.4.51 34180 10.33.4.231 22
-SSH_TTY=/dev/pts/8
 STRINGS=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strings
 STRIP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strip
 TERM=screen
 TERM_PROGRAM=tmux
 TERM_PROGRAM_VERSION=3.2a
-TMUX=/tmp/tmux-1028/default,1588807,0
-TMUX_PANE=%0
 USER=yangyaodong
 WANDB_API_KEY=7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33
-WANDB_SERVICE=2-1593022-tcp-localhost-46867
 WORLD_SIZE=8
 XDG_DATA_DIRS=/usr/local/share:/usr/share:/var/lib/snapd/desktop
 XDG_RUNTIME_DIR=/run/user/1028
 XDG_SESSION_CLASS=user
-XDG_SESSION_ID=1733
 XDG_SESSION_TYPE=tty
 ZERO_STAGE=3
 _=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/deepspeed

  return 1;
  fi
 }
+BROWSER=/home/yangyaodong/.vscode-server/cli/servers/Stable-e54c774e0add60467559eb0d1e229c6452cf8447/server/bin/helpers/browser.sh
 BUILD=x86_64-conda-linux-gnu
+BUNDLED_DEBUGPY_PATH=/home/yangyaodong/.vscode-server/extensions/ms-python.debugpy-2025.0.1-linux-x64/bundled/libs/debugpy
 CC=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cc
 CC_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cc
 CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include  -I/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/include  -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
 CMAKE_ARGS=-DCMAKE_AR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ar -DCMAKE_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ranlib -DCMAKE_LINKER=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ld -DCMAKE_STRIP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strip -DCMAKE_BUILD_TYPE=Release
 CMAKE_PREFIX_PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama:/aifs4su/yaodong/miniconda3/envs/hantao_llama/x86_64-conda-linux-gnu/sysroot/usr
 CMD_WLM_CLUSTER_NAME=slurm
+COLORTERM=truecolor
 CONDA_BUILD_SYSROOT=/aifs4su/yaodong/miniconda3/envs/hantao_llama/x86_64-conda-linux-gnu/sysroot
 CONDA_DEFAULT_ENV=hantao_llama
 CONDA_EXE=/aifs4su/yaodong/miniconda3/bin/conda
 CXXFLAGS=-fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include  -I/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/include  -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
 CXX_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-c++
 DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1028/bus
+DEBUGPY_ADAPTER_ENDPOINTS=/home/yangyaodong/.vscode-server/extensions/ms-python.debugpy-2025.0.1-linux-x64/.noConfigDebugAdapterEndpoints/endpoint-cf2a8fd1c0b5bb2d.txt
 DEBUG_CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include
 DEBUG_CPPFLAGS=-D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include
 DEBUG_CXXFLAGS=-fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include
 GCC_AR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-ar
 GCC_NM=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-nm
 GCC_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-ranlib
+GIT_ASKPASS=/home/yangyaodong/.vscode-server/cli/servers/Stable-e54c774e0add60467559eb0d1e229c6452cf8447/server/extensions/git/dist/askpass.sh
 GPROF=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gprof
 GSETTINGS_SCHEMA_DIR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/share/glib-2.0/schemas
 GSETTINGS_SCHEMA_DIR_CONDA_BACKUP=
 MANPATH=/usr/mpi/gcc/openmpi-4.1.7a1/share/man:/cm/shared/apps/slurm/current/man:/cm/local/apps/environment-modules/4.5.3/share/man:/usr/local/man:/usr/local/share/man:/usr/share/man:/cm/local/apps/environment-modules/current/share/man:/cm/local/apps/environment-modules/current/share/man
 MANPATH_modshare=/usr/local/share/man:1:/usr/mpi/gcc/openmpi-4.1.7a1/share/man:1:/cm/local/apps/environment-modules/current/share/man:1:/cm/local/apps/environment-modules/4.5.3/share/man:1:/usr/local/man:1:/usr/share/man:1:/cm/shared/apps/slurm/current/man:1
 MASTER_ADDR=127.0.0.1
+MASTER_PORT=19730
 MIG_PARTED_CHECKPOINT_FILE=/var/lib/nvidia-mig-manager/checkpoint.json
 MIG_PARTED_CONFIG_FILE=/etc/nvidia-mig-manager/config.yaml
 MIG_PARTED_HOOKS_FILE=/etc/nvidia-mig-manager/hooks.yaml
 MODULEPATH=/cm/local/modulefiles:/cm/shared/modulefiles
 MODULESHOME=/cm/local/apps/environment-modules/4.5.3
 MODULES_CMD=/cm/local/apps/environment-modules/4.5.3/libexec/modulecmd.tcl
 MODULES_SET_SHELL_STARTUP=0
 OBJCOPY=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-objcopy
 OBJDUMP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-objdump
 OLDPWD=/home/yangyaodong
+PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin:/usr/lpp/mmfs/bin:/usr/local/cuda/bin:/opt/bin:/usr/lpp/mmfs/bin:/cm/shared/apps/slurm/current/sbin:/cm/shared/apps/slurm/current/bin:/usr/local/cuda/bin:/opt/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/sbin:/usr/sbin:/cm/local/apps/environment-modules/4.5.3/bin
+PATH_modshare=/usr/mpi/gcc/openmpi-4.1.7a1/bin:1:/opt/bin/:1:/usr/bin:1:/usr/local/bin:1:/cm/shared/apps/slurm/current/bin:1:/home/yangyaodong/.vscode-server/cli/servers/Stable-e54c774e0add60467559eb0d1e229c6452cf8447/server/bin/remote-cli:1:/cm/shared/apps/slurm/current/sbin:1:/bin:1:/snap/bin:1:/sbin:1:/home/yangyaodong/.vscode-server/data/User/globalStorage/github.copilot-chat/debugCommand:1:/home/yangyaodong/.vscode-server/extensions/ms-python.debugpy-2025.0.1-linux-x64/bundled/scripts/noConfigScripts:1:/usr/sbin:1:/usr/games:1:/cm/local/apps/environment-modules/4.5.3/bin:1:/usr/local/sbin:1:/usr/lpp/mmfs/bin:1:/usr/local/cuda/bin:1:/usr/local/games:1
 PWD=/aifs4su/yaodong/hantao/align-anything/scripts
+PYDEVD_DISABLE_FILE_VALIDATION=1
 PYTHONHASHSEED=42
+PYTHONPATH=/aifs4su/yaodong/hantao/align-anything/scripts:/aifs4su/yaodong/hantao/align-anything/scripts
 RANK=0
 RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ranlib
 READELF=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-readelf
 SHELL=/bin/bash
+SHLVL=4
 SIZE=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-size
 SLURM_CONF=/cm/shared/apps/slurm/var/etc/slurm/slurm.conf
+SSH_CLIENT=10.33.4.51 46666 22
+SSH_CONNECTION=10.33.4.230 40638 10.33.4.213 22
+SSL_CERT_DIR=/usr/lib/ssl/certs
+SSL_CERT_FILE=/usr/lib/ssl/certs/ca-certificates.crt
 STRINGS=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strings
 STRIP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strip
 TERM=screen
 TERM_PROGRAM=tmux
 TERM_PROGRAM_VERSION=3.2a
+TMUX=/tmp/tmux-1028/default,2296743,10
+TMUX_PANE=%25
 USER=yangyaodong
+VSCODE_GIT_ASKPASS_EXTRA_ARGS=
+VSCODE_GIT_ASKPASS_MAIN=/home/yangyaodong/.vscode-server/cli/servers/Stable-e54c774e0add60467559eb0d1e229c6452cf8447/server/extensions/git/dist/askpass-main.js
+VSCODE_GIT_ASKPASS_NODE=/home/yangyaodong/.vscode-server/cli/servers/Stable-e54c774e0add60467559eb0d1e229c6452cf8447/server/node
+VSCODE_GIT_IPC_HANDLE=/run/user/1028/vscode-git-bbbbf321f6.sock
+VSCODE_IPC_HOOK_CLI=/run/user/1028/vscode-ipc-e2edf668-dca9-4331-a6ac-7d4507f653ce.sock
 WANDB_API_KEY=7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33
+WANDB_SERVICE=2-618398-tcp-localhost-37095
 WORLD_SIZE=8
 XDG_DATA_DIRS=/usr/local/share:/usr/share:/var/lib/snapd/desktop
 XDG_RUNTIME_DIR=/run/user/1028
 XDG_SESSION_CLASS=user
+XDG_SESSION_ID=43255
 XDG_SESSION_TYPE=tty
 ZERO_STAGE=3
 _=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/deepspeed

script.sh CHANGED Viewed

@@ -15,35 +15,37 @@
 # limitations under the License.
 # ==============================================================================
-DATASETS_NAME=("top1-40")
-MODEL_NAME_OR_PATH="/aifs4su/yaodong/hantao/models/llava-v1.6-mistral-7b-hf" # model path
-for DATASET_NAME in "${DATASETS_NAME[@]}"; do
-    TRAIN_DATASETS="/aifs4su/yaodong/hantao/datasets/MMInstruct-GPT4V_mistral-7b_cosi_cut/merged/${DATASET_NAME}" # dataset path
-    TRAIN_TEMPLATE="MM_TI2T_LLAVA" # dataset template
     TRAIN_NAME="text-image-to-text" # dataset name
     TRAIN_SPLIT="train" # split the dataset
-    OUTPUT_DIR="../outputs/LLAVA_7B_cosi/${DATASET_NAME}" # output dir
-     # For wandb online logging
-     export WANDB_API_KEY="7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33"
-     # Source the setup script
-     source ./setup.sh
-     # Execute deepspeed command
-     deepspeed \
-          --master_port ${MASTER_PORT} \
-          --module align_anything.trainers.text_image_to_text.sft \
-          --model_name_or_path ${MODEL_NAME_OR_PATH} \
-          --train_datasets ${TRAIN_DATASETS} \
-          --train_template ${TRAIN_TEMPLATE} \
-          --train_split ${TRAIN_SPLIT} \
-          --train_name ${TRAIN_NAME} \
-          --output_dir ${OUTPUT_DIR} \
-          --save_total_limit 6 \
-          --train_batch_size 16 \
-          --epochs 3
 done

 # limitations under the License.
 # ==============================================================================
+DATASETS_NAME=("top1-20" "top1-40")
+MODEL_NAME_OR_PATH="/aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-7b-hf" # model path
+# HOSTFILE="/aifs4su/yaodong/hantao/align-anything/scripts/.hostfile"
+for DATASET_NAME in ${DATASETS_NAME[@]}; do
+    TRAIN_DATASETS="/aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-7b_cosi_cut/merged/${DATASET_NAME}" # dataset path
+    TRAIN_TEMPLATE="AA_TI2T_LLAVA" # dataset template
     TRAIN_NAME="text-image-to-text" # dataset name
     TRAIN_SPLIT="train" # split the dataset
+    OUTPUT_DIR="../outputs/llava_1.6_vicuna_7B_cosi/${DATASET_NAME}" # output dir
+    # For wandb online logging
+    export WANDB_API_KEY="7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33"
+    # Source the setup script
+    source ./setup.sh
+    # Execute deepspeed command
+    deepspeed \
+        --master_port ${MASTER_PORT} \
+        --module align_anything.trainers.text_image_to_text.dpo \
+        --model_name_or_path ${MODEL_NAME_OR_PATH} \
+        --train_datasets ${TRAIN_DATASETS} \
+        --train_template ${TRAIN_TEMPLATE} \
+        --train_split ${TRAIN_SPLIT} \
+        --train_name ${TRAIN_NAME} \
+        --output_dir ${OUTPUT_DIR} \
+        --save_total_limit 3 \
+        --train_batch_size 8 \
+        --epochs 3
 done

slice_1945/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image>": 32000
+}

slice_1945/chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "chat_template": "{% for message in messages %}{% if message['role'] != 'system' %}{{ message['role'].upper() + ': '}}{% endif %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + ' '}}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ content['text'] + ' '}}{% endgeneration %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}"
+}

slice_1945/config.json ADDED Viewed

	@@ -0,0 +1,89 @@

+{
+  "_attn_implementation_autoset": true,
+  "architectures": [
+    "LlavaNextForConditionalGeneration"
+  ],
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "ignore_index": -100,
+  "image_grid_pinpoints": [
+    [
+      336,
+      672
+    ],
+    [
+      672,
+      336
+    ],
+    [
+      672,
+      672
+    ],
+    [
+      1008,
+      336
+    ],
+    [
+      336,
+      1008
+    ]
+  ],
+  "image_seq_length": 576,
+  "image_token_index": 32000,
+  "model_type": "llava_next",
+  "multimodal_projector_bias": true,
+  "pad_token_id": 0,
+  "projector_hidden_act": "gelu",
+  "text_config": {
+    "_name_or_path": "lmsys/vicuna-7b-v1.5",
+    "architectures": [
+      "LlamaForCausalLM"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 4096,
+    "initializer_range": 0.02,
+    "intermediate_size": 11008,
+    "max_position_embeddings": 4096,
+    "mlp_bias": false,
+    "model_type": "llama",
+    "num_attention_heads": 32,
+    "num_hidden_layers": 32,
+    "num_key_value_heads": 32,
+    "pad_token_id": 0,
+    "pretraining_tp": 1,
+    "rms_norm_eps": 1e-05,
+    "rope_scaling": null,
+    "rope_theta": 10000.0,
+    "torch_dtype": "bfloat16",
+    "use_cache": true,
+    "vocab_size": 32064
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.50.0",
+  "use_image_newline_parameter": true,
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 1024,
+    "image_size": 336,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 4096,
+    "layer_norm_eps": 1e-05,
+    "model_type": "clip_vision_model",
+    "num_attention_heads": 16,
+    "num_channels": 3,
+    "num_hidden_layers": 24,
+    "patch_size": 14,
+    "projection_dim": 768,
+    "torch_dtype": "bfloat16",
+    "vocab_size": 32000
+  },
+  "vision_feature_layer": -2,
+  "vision_feature_select_strategy": "default",
+  "vocab_size": 32064
+}

slice_1945/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "aspect_ratio_setting": "anyres",
+  "crop_size": {
+    "height": 336,
+    "width": 336
+  },
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_pad": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_grid_pinpoints": [
+    [
+      336,
+      672
+    ],
+    [
+      672,
+      336
+    ],
+    [
+      672,
+      672
+    ],
+    [
+      1008,
+      336
+    ],
+    [
+      336,
+      1008
+    ]
+  ],
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "LlavaNextImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "processor_class": "LlavaNextProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "shortest_edge": 336
+  }
+}

slice_1945/processor_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "image_token": "<image>",
+  "num_additional_image_tokens": 1,
+  "patch_size": 14,
+  "processor_class": "LlavaNextProcessor",
+  "vision_feature_select_strategy": "default"
+}

slice_1945/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12d69c0581c978e62733877edc6e713c184a175a4f5401962537c55d13a1bfc9
+size 14127100866

slice_1945/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image>",
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

slice_1945/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

slice_1945/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

slice_1945/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<image>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {
+    "image_token": "<image>"
+  },
+  "image_token": "<image>",
+  "legacy": false,
+  "model_max_length": 4096,
+  "pad_token": "<unk>",
+  "padding_side": "left",
+  "processor_class": "LlavaNextProcessor",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

slice_3890/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image>": 32000
+}

slice_3890/chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "chat_template": "{% for message in messages %}{% if message['role'] != 'system' %}{{ message['role'].upper() + ': '}}{% endif %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + ' '}}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ content['text'] + ' '}}{% endgeneration %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}"
+}

slice_3890/config.json ADDED Viewed

	@@ -0,0 +1,89 @@

+{
+  "_attn_implementation_autoset": true,
+  "architectures": [
+    "LlavaNextForConditionalGeneration"
+  ],
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "ignore_index": -100,
+  "image_grid_pinpoints": [
+    [
+      336,
+      672
+    ],
+    [
+      672,
+      336
+    ],
+    [
+      672,
+      672
+    ],
+    [
+      1008,
+      336
+    ],
+    [
+      336,
+      1008
+    ]
+  ],
+  "image_seq_length": 576,
+  "image_token_index": 32000,
+  "model_type": "llava_next",
+  "multimodal_projector_bias": true,
+  "pad_token_id": 0,
+  "projector_hidden_act": "gelu",
+  "text_config": {
+    "_name_or_path": "lmsys/vicuna-7b-v1.5",
+    "architectures": [
+      "LlamaForCausalLM"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 4096,
+    "initializer_range": 0.02,
+    "intermediate_size": 11008,
+    "max_position_embeddings": 4096,
+    "mlp_bias": false,
+    "model_type": "llama",
+    "num_attention_heads": 32,
+    "num_hidden_layers": 32,
+    "num_key_value_heads": 32,
+    "pad_token_id": 0,
+    "pretraining_tp": 1,
+    "rms_norm_eps": 1e-05,
+    "rope_scaling": null,
+    "rope_theta": 10000.0,
+    "torch_dtype": "bfloat16",
+    "use_cache": true,
+    "vocab_size": 32064
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.50.0",
+  "use_image_newline_parameter": true,
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 1024,
+    "image_size": 336,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 4096,
+    "layer_norm_eps": 1e-05,
+    "model_type": "clip_vision_model",
+    "num_attention_heads": 16,
+    "num_channels": 3,
+    "num_hidden_layers": 24,
+    "patch_size": 14,
+    "projection_dim": 768,
+    "torch_dtype": "bfloat16",
+    "vocab_size": 32000
+  },
+  "vision_feature_layer": -2,
+  "vision_feature_select_strategy": "default",
+  "vocab_size": 32064
+}

slice_3890/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "aspect_ratio_setting": "anyres",
+  "crop_size": {
+    "height": 336,
+    "width": 336
+  },
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_pad": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_grid_pinpoints": [
+    [
+      336,
+      672
+    ],
+    [
+      672,
+      336
+    ],
+    [
+      672,
+      672
+    ],
+    [
+      1008,
+      336
+    ],
+    [
+      336,
+      1008
+    ]
+  ],
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "LlavaNextImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "processor_class": "LlavaNextProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "shortest_edge": 336
+  }
+}

slice_3890/processor_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "image_token": "<image>",
+  "num_additional_image_tokens": 1,
+  "patch_size": 14,
+  "processor_class": "LlavaNextProcessor",
+  "vision_feature_select_strategy": "default"
+}

slice_3890/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3536025677626475a7e77d8676491c7fc8637e77ad72486bac71a55de7a88c
+size 14127100866

slice_3890/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image>",
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

slice_3890/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

slice_3890/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

slice_3890/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<image>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {
+    "image_token": "<image>"
+  },
+  "image_token": "<image>",
+  "legacy": false,
+  "model_max_length": 4096,
+  "pad_token": "<unk>",
+  "padding_side": "left",
+  "processor_class": "LlavaNextProcessor",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

slice_5835/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image>": 32000
+}

slice_5835/chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "chat_template": "{% for message in messages %}{% if message['role'] != 'system' %}{{ message['role'].upper() + ': '}}{% endif %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + ' '}}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ content['text'] + ' '}}{% endgeneration %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}"
+}

slice_5835/config.json ADDED Viewed

	@@ -0,0 +1,89 @@

+{
+  "_attn_implementation_autoset": true,
+  "architectures": [
+    "LlavaNextForConditionalGeneration"
+  ],
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "ignore_index": -100,
+  "image_grid_pinpoints": [
+    [
+      336,
+      672
+    ],
+    [
+      672,
+      336
+    ],
+    [
+      672,
+      672
+    ],
+    [
+      1008,
+      336
+    ],
+    [
+      336,
+      1008
+    ]
+  ],
+  "image_seq_length": 576,
+  "image_token_index": 32000,
+  "model_type": "llava_next",
+  "multimodal_projector_bias": true,
+  "pad_token_id": 0,
+  "projector_hidden_act": "gelu",
+  "text_config": {
+    "_name_or_path": "lmsys/vicuna-7b-v1.5",
+    "architectures": [
+      "LlamaForCausalLM"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 4096,
+    "initializer_range": 0.02,
+    "intermediate_size": 11008,
+    "max_position_embeddings": 4096,
+    "mlp_bias": false,
+    "model_type": "llama",
+    "num_attention_heads": 32,
+    "num_hidden_layers": 32,
+    "num_key_value_heads": 32,
+    "pad_token_id": 0,
+    "pretraining_tp": 1,
+    "rms_norm_eps": 1e-05,
+    "rope_scaling": null,
+    "rope_theta": 10000.0,
+    "torch_dtype": "bfloat16",
+    "use_cache": true,
+    "vocab_size": 32064
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.50.0",
+  "use_image_newline_parameter": true,
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 1024,
+    "image_size": 336,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 4096,
+    "layer_norm_eps": 1e-05,
+    "model_type": "clip_vision_model",
+    "num_attention_heads": 16,
+    "num_channels": 3,
+    "num_hidden_layers": 24,
+    "patch_size": 14,
+    "projection_dim": 768,
+    "torch_dtype": "bfloat16",
+    "vocab_size": 32000
+  },
+  "vision_feature_layer": -2,
+  "vision_feature_select_strategy": "default",
+  "vocab_size": 32064
+}

slice_5835/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "aspect_ratio_setting": "anyres",
+  "crop_size": {
+    "height": 336,
+    "width": 336
+  },
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_pad": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_grid_pinpoints": [
+    [
+      336,
+      672
+    ],
+    [
+      672,
+      336
+    ],
+    [
+      672,
+      672
+    ],
+    [
+      1008,
+      336
+    ],
+    [
+      336,
+      1008
+    ]
+  ],
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "LlavaNextImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "processor_class": "LlavaNextProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "shortest_edge": 336
+  }
+}

slice_5835/processor_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "image_token": "<image>",
+  "num_additional_image_tokens": 1,
+  "patch_size": 14,
+  "processor_class": "LlavaNextProcessor",
+  "vision_feature_select_strategy": "default"
+}

slice_5835/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:561e2931a4e46eba59940f06c5cba65d9ecc54224dbfe088c7c13c7e985f962e
+size 14127100866

slice_5835/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image>",
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

slice_5835/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

slice_5835/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

slice_5835/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<image>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {
+    "image_token": "<image>"
+  },
+  "image_token": "<image>",
+  "legacy": false,
+  "model_max_length": 4096,
+  "pad_token": "<unk>",
+  "padding_side": "left",
+  "processor_class": "LlavaNextProcessor",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

slice_end/added_tokens.json CHANGED Viewed

@@ -1,4 +1,3 @@
 {
-  "<image>": 32000,
-  "<pad>": 32001
 }

 {
+  "<image>": 32000
 }

slice_end/config.json CHANGED Viewed

@@ -32,27 +32,31 @@
   "image_token_index": 32000,
   "model_type": "llava_next",
   "multimodal_projector_bias": true,
-  "pad_token_id": 32001,
   "projector_hidden_act": "gelu",
   "text_config": {
-    "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
     "architectures": [
-      "MistralForCausalLM"
     ],
     "attention_dropout": 0.0,
     "head_dim": 128,
     "hidden_act": "silu",
     "hidden_size": 4096,
     "initializer_range": 0.02,
-    "intermediate_size": 14336,
-    "max_position_embeddings": 32768,
-    "model_type": "mistral",
     "num_attention_heads": 32,
     "num_hidden_layers": 32,
-    "num_key_value_heads": 8,
     "rms_norm_eps": 1e-05,
-    "rope_theta": 1000000.0,
-    "sliding_window": null,
     "torch_dtype": "bfloat16",
     "use_cache": true,
     "vocab_size": 32064

   "image_token_index": 32000,
   "model_type": "llava_next",
   "multimodal_projector_bias": true,
+  "pad_token_id": 0,
   "projector_hidden_act": "gelu",
   "text_config": {
+    "_name_or_path": "lmsys/vicuna-7b-v1.5",
     "architectures": [
+      "LlamaForCausalLM"
     ],
+    "attention_bias": false,
     "attention_dropout": 0.0,
     "head_dim": 128,
     "hidden_act": "silu",
     "hidden_size": 4096,
     "initializer_range": 0.02,
+    "intermediate_size": 11008,
+    "max_position_embeddings": 4096,
+    "mlp_bias": false,
+    "model_type": "llama",
     "num_attention_heads": 32,
     "num_hidden_layers": 32,
+    "num_key_value_heads": 32,
+    "pad_token_id": 0,
+    "pretraining_tp": 1,
     "rms_norm_eps": 1e-05,
+    "rope_scaling": null,
+    "rope_theta": 10000.0,
     "torch_dtype": "bfloat16",
     "use_cache": true,
     "vocab_size": 32064

slice_end/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:10aecfb74b294ff71cad7db9a38b54042f26f379a4339cb36323dcb353204ef5
-size 15133733934

 version https://git-lfs.github.com/spec/v1
+oid sha256:561e2931a4e46eba59940f06c5cba65d9ecc54224dbfe088c7c13c7e985f962e
+size 14127100866

slice_end/special_tokens_map.json CHANGED Viewed

@@ -15,7 +15,7 @@
   },
   "image_token": "<image>",
   "pad_token": {
-    "content": "<pad>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

   },
   "image_token": "<image>",
   "pad_token": {
+    "content": "<unk>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

slice_end/tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

slice_end/tokenizer.model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
-size 493443

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

slice_end/tokenizer_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "add_bos_token": true,
   "add_eos_token": false,
-  "add_prefix_space": null,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
@@ -34,32 +34,19 @@
       "rstrip": false,
       "single_word": false,
       "special": true
-    },
-    "32001": {
-      "content": "<pad>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
     }
   },
-  "additional_special_tokens": [],
   "bos_token": "<s>",
-  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "extra_special_tokens": {
     "image_token": "<image>"
   },
   "image_token": "<image>",
-  "legacy": true,
-  "max_length": null,
   "model_max_length": 4096,
-  "pad_to_multiple_of": null,
-  "pad_token": "<pad>",
-  "pad_token_type_id": 0,
-  "padding_side": "right",
   "processor_class": "LlavaNextProcessor",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,

 {
   "add_bos_token": true,
   "add_eos_token": false,
+  "add_prefix_space": true,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "extra_special_tokens": {
     "image_token": "<image>"
   },
   "image_token": "<image>",
+  "legacy": false,
   "model_max_length": 4096,
+  "pad_token": "<unk>",
+  "padding_side": "left",
   "processor_class": "LlavaNextProcessor",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,

wandb/debug-internal.log CHANGED Viewed

@@ -1,16 +1,16 @@
-{"time":"2025-03-30T12:02:29.50658324+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.8","symlink path":"../outputs/LLAVA_7B_cosi/top1-40/wandb/run-20250330_120229-onfejhxh/logs/debug-core.log"}
-{"time":"2025-03-30T12:02:29.721137173+08:00","level":"INFO","msg":"created new stream","id":"onfejhxh"}
-{"time":"2025-03-30T12:02:29.721199348+08:00","level":"INFO","msg":"stream: started","id":"onfejhxh"}
-{"time":"2025-03-30T12:02:29.72127821+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"onfejhxh"}
-{"time":"2025-03-30T12:02:29.721279385+08:00","level":"INFO","msg":"handler: started","stream_id":"onfejhxh"}
-{"time":"2025-03-30T12:02:29.721285552+08:00","level":"INFO","msg":"sender: started","stream_id":"onfejhxh"}
-{"time":"2025-03-30T12:02:30.085144907+08:00","level":"INFO","msg":"Starting system monitor"}
-{"time":"2025-03-30T20:30:48.708400537+08:00","level":"INFO","msg":"Stopping system monitor"}
-{"time":"2025-03-30T20:30:48.715166297+08:00","level":"INFO","msg":"Stopped system monitor"}
-{"time":"2025-03-30T20:30:49.889450292+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
-{"time":"2025-03-30T20:30:50.141224282+08:00","level":"INFO","msg":"stream: closing","id":"onfejhxh"}
-{"time":"2025-03-30T20:30:50.141244264+08:00","level":"WARN","msg":"sender: received Exit record more than once, ignoring"}
-{"time":"2025-03-30T20:30:50.141291109+08:00","level":"INFO","msg":"sender: closed","stream_id":"onfejhxh"}
-{"time":"2025-03-30T20:30:50.14124957+08:00","level":"INFO","msg":"handler: closed","stream_id":"onfejhxh"}
-{"time":"2025-03-30T20:30:50.141257294+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"onfejhxh"}
-{"time":"2025-03-30T20:30:50.141607946+08:00","level":"INFO","msg":"stream: closed","id":"onfejhxh"}

+{"time":"2025-04-01T11:37:14.208599931+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.8","symlink path":"../outputs/llava_1.6_vicuna_7B_cosi/top1-40/wandb/run-20250401_113714-csgd5etr/logs/debug-core.log"}
+{"time":"2025-04-01T11:37:14.524748736+08:00","level":"INFO","msg":"created new stream","id":"csgd5etr"}
+{"time":"2025-04-01T11:37:14.524801572+08:00","level":"INFO","msg":"stream: started","id":"csgd5etr"}
+{"time":"2025-04-01T11:37:14.524825793+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"csgd5etr"}
+{"time":"2025-04-01T11:37:14.524831464+08:00","level":"INFO","msg":"handler: started","stream_id":"csgd5etr"}
+{"time":"2025-04-01T11:37:14.524868464+08:00","level":"INFO","msg":"sender: started","stream_id":"csgd5etr"}
+{"time":"2025-04-01T11:37:14.908466405+08:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-04-01T15:16:09.414058626+08:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-04-01T15:16:09.421777269+08:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-04-01T15:16:10.873890429+08:00","level":"INFO","msg":"stream: closing","id":"csgd5etr"}
+{"time":"2025-04-01T15:16:10.873923324+08:00","level":"WARN","msg":"sender: received Exit record more than once, ignoring"}
+{"time":"2025-04-01T15:16:11.529727104+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-04-01T15:16:11.763778385+08:00","level":"INFO","msg":"handler: closed","stream_id":"csgd5etr"}
+{"time":"2025-04-01T15:16:11.763804921+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"csgd5etr"}
+{"time":"2025-04-01T15:16:11.763934707+08:00","level":"INFO","msg":"sender: closed","stream_id":"csgd5etr"}
+{"time":"2025-04-01T15:16:11.76416327+08:00","level":"INFO","msg":"stream: closed","id":"csgd5etr"}

wandb/debug.log CHANGED Viewed

@@ -1,32 +1,32 @@
-2025-03-30 12:02:29,496 INFO    MainThread:1593022 [wandb_setup.py:_flush():67] Current SDK version is 0.19.8
-2025-03-30 12:02:29,497 INFO    MainThread:1593022 [wandb_setup.py:_flush():67] Configure stats pid to 1593022
-2025-03-30 12:02:29,497 INFO    MainThread:1593022 [wandb_setup.py:_flush():67] Loading settings from /home/yangyaodong/.config/wandb/settings
-2025-03-30 12:02:29,497 INFO    MainThread:1593022 [wandb_setup.py:_flush():67] Loading settings from /aifs4su/yaodong/hantao/align-anything/scripts/wandb/settings
-2025-03-30 12:02:29,497 INFO    MainThread:1593022 [wandb_setup.py:_flush():67] Loading settings from environment variables
-2025-03-30 12:02:29,497 INFO    MainThread:1593022 [wandb_init.py:setup_run_log_directory():647] Logging user logs to ../outputs/LLAVA_7B_cosi/top1-40/wandb/run-20250330_120229-onfejhxh/logs/debug.log
-2025-03-30 12:02:29,497 INFO    MainThread:1593022 [wandb_init.py:setup_run_log_directory():648] Logging internal logs to ../outputs/LLAVA_7B_cosi/top1-40/wandb/run-20250330_120229-onfejhxh/logs/debug-internal.log
-2025-03-30 12:02:29,497 INFO    MainThread:1593022 [wandb_init.py:init():761] calling init triggers
-2025-03-30 12:02:29,497 INFO    MainThread:1593022 [wandb_init.py:init():766] wandb.init called with sweep_config: {}
-config: {'train_cfgs': {'save_checkpoint': False, 'load_checkpoint': False, 'ds_cfgs': 'ds_z3_config.json', 'epochs': 3, 'seed': 42, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 16, 'gradient_checkpointing': True, 'learning_rate': 2e-05, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.0, 'adam_betas': [0.9, 0.95], 'adam_epsilon': 1e-08, 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'freeze_mm_proj': False, 'freeze_vision_tower': True, 'freeze_language_model': False, 'max_grad_norm': 1.0}, 'data_cfgs': {'load_multi_datasets': False, 'train_datasets': '/aifs4su/yaodong/hantao/datasets/MMInstruct-GPT4V_mistral-7b_cosi_cut/merged/top1-40', 'train_template': 'MM_TI2T_LLAVA', 'train_size': {}, 'train_split': 'train', 'train_name': 'text-image-to-text', 'train_data_files': {}, 'train_optional_args': [], 'eval_datasets': {}, 'eval_template': {}, 'eval_name': {}, 'eval_size': {}, 'eval_split': {}, 'eval_subset': {}, 'eval_data_files': {}, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'sft', 'output_dir': '../outputs/LLAVA_7B_cosi/top1-40', 'cache_dir': {}, 'save_total_limit': 6}, 'model_cfgs': {'model_name_or_path': '/aifs4su/yaodong/hantao/models/llava-v1.6-mistral-7b-hf', 'trust_remote_code': True, 'model_max_length': 4096}, 'special_tokens': {}, '_wandb': {}}
-2025-03-30 12:02:29,497 INFO    MainThread:1593022 [wandb_init.py:init():784] starting backend
-2025-03-30 12:02:29,497 INFO    MainThread:1593022 [wandb_init.py:init():788] sending inform_init request
-2025-03-30 12:02:29,502 INFO    MainThread:1593022 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2025-03-30 12:02:29,503 INFO    MainThread:1593022 [wandb_init.py:init():798] backend started and connected
-2025-03-30 12:02:29,504 INFO    MainThread:1593022 [wandb_init.py:init():891] updated telemetry
-2025-03-30 12:02:29,540 INFO    MainThread:1593022 [wandb_init.py:init():915] communicating run to backend with 90.0 second timeout
-2025-03-30 12:02:30,082 INFO    MainThread:1593022 [wandb_init.py:init():990] starting run threads in backend
-2025-03-30 12:02:30,337 INFO    MainThread:1593022 [wandb_run.py:_console_start():2375] atexit reg
-2025-03-30 12:02:30,337 INFO    MainThread:1593022 [wandb_run.py:_redirect():2227] redirect: wrap_raw
-2025-03-30 12:02:30,337 INFO    MainThread:1593022 [wandb_run.py:_redirect():2292] Wrapping output streams.
-2025-03-30 12:02:30,337 INFO    MainThread:1593022 [wandb_run.py:_redirect():2315] Redirects installed.
-2025-03-30 12:02:30,342 INFO    MainThread:1593022 [wandb_init.py:init():1032] run started, returning control to user process
-2025-03-30 20:30:48,665 INFO    MainThread:1593022 [wandb_run.py:_finish():2112] finishing run htlou/align-anything/onfejhxh
-2025-03-30 20:30:48,666 INFO    MainThread:1593022 [wandb_run.py:_atexit_cleanup():2340] got exitcode: 0
-2025-03-30 20:30:48,666 INFO    MainThread:1593022 [wandb_run.py:_restore():2322] restore
-2025-03-30 20:30:48,666 INFO    MainThread:1593022 [wandb_run.py:_restore():2328] restore done
-2025-03-30 20:30:49,667 INFO    MainThread:1593022 [wandb_run.py:_restore():2322] restore
-2025-03-30 20:30:49,667 INFO    MainThread:1593022 [wandb_run.py:_restore():2328] restore done
-2025-03-30 20:30:49,667 ERROR   MainThread:1593022 [wandb_run.py:_atexit_cleanup():2361] Problem finishing run
 Traceback (most recent call last):
   File "/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2352, in _atexit_cleanup
     self._on_finish()
@@ -44,4 +44,4 @@ Traceback (most recent call last):
   File "/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/concurrent/futures/thread.py", line 169, in submit
     raise RuntimeError('cannot schedule new futures after '
 RuntimeError: cannot schedule new futures after interpreter shutdown
-2025-03-30 20:30:50,140 INFO    MsgRouterThr:1593022 [mailbox.py:close():129] Closing mailbox, abandoning 1 handles.

+2025-04-01 11:37:14,195 INFO    MainThread:618398 [wandb_setup.py:_flush():67] Current SDK version is 0.19.8
+2025-04-01 11:37:14,196 INFO    MainThread:618398 [wandb_setup.py:_flush():67] Configure stats pid to 618398
+2025-04-01 11:37:14,196 INFO    MainThread:618398 [wandb_setup.py:_flush():67] Loading settings from /home/yangyaodong/.config/wandb/settings
+2025-04-01 11:37:14,196 INFO    MainThread:618398 [wandb_setup.py:_flush():67] Loading settings from /aifs4su/yaodong/hantao/align-anything/scripts/wandb/settings
+2025-04-01 11:37:14,196 INFO    MainThread:618398 [wandb_setup.py:_flush():67] Loading settings from environment variables
+2025-04-01 11:37:14,196 INFO    MainThread:618398 [wandb_init.py:setup_run_log_directory():647] Logging user logs to ../outputs/llava_1.6_vicuna_7B_cosi/top1-40/wandb/run-20250401_113714-csgd5etr/logs/debug.log
+2025-04-01 11:37:14,196 INFO    MainThread:618398 [wandb_init.py:setup_run_log_directory():648] Logging internal logs to ../outputs/llava_1.6_vicuna_7B_cosi/top1-40/wandb/run-20250401_113714-csgd5etr/logs/debug-internal.log
+2025-04-01 11:37:14,196 INFO    MainThread:618398 [wandb_init.py:init():761] calling init triggers
+2025-04-01 11:37:14,196 INFO    MainThread:618398 [wandb_init.py:init():766] wandb.init called with sweep_config: {}
+config: {'train_cfgs': {'save_checkpoint': False, 'load_checkpoint': False, 'ds_cfgs': 'ds_z3_config.json', 'epochs': 3, 'seed': 42, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 1, 'gradient_checkpointing': True, 'learning_rate': 1e-06, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.0, 'adam_betas': [0.9, 0.95], 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'regularization': 0.001, 'scale_coeff': 0.1, 'freeze_mm_proj': False, 'freeze_vision_tower': True, 'freeze_language_model': False}, 'data_cfgs': {'train_datasets': '/aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-7b_cosi_cut/merged/top1-40', 'train_template': 'AA_TI2T_LLAVA', 'train_size': {}, 'train_split': 'train', 'train_name': 'text-image-to-text', 'train_data_files': {}, 'train_optional_args': [], 'eval_datasets': {}, 'eval_template': {}, 'eval_size': {}, 'eval_split': {}, 'eval_subset': {}, 'eval_data_files': {}, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'dpo', 'output_dir': '../outputs/llava_1.6_vicuna_7B_cosi/top1-40', 'cache_dir': {}, 'save_total_limit': 3}, 'model_cfgs': {'model_name_or_path': '/aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-7b-hf', 'trust_remote_code': True, 'model_max_length': 4096}, 'special_tokens': {}, '_wandb': {}}
+2025-04-01 11:37:14,196 INFO    MainThread:618398 [wandb_init.py:init():784] starting backend
+2025-04-01 11:37:14,196 INFO    MainThread:618398 [wandb_init.py:init():788] sending inform_init request
+2025-04-01 11:37:14,205 INFO    MainThread:618398 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-04-01 11:37:14,205 INFO    MainThread:618398 [wandb_init.py:init():798] backend started and connected
+2025-04-01 11:37:14,206 INFO    MainThread:618398 [wandb_init.py:init():891] updated telemetry
+2025-04-01 11:37:14,230 INFO    MainThread:618398 [wandb_init.py:init():915] communicating run to backend with 90.0 second timeout
+2025-04-01 11:37:14,902 INFO    MainThread:618398 [wandb_init.py:init():990] starting run threads in backend
+2025-04-01 11:37:15,308 INFO    MainThread:618398 [wandb_run.py:_console_start():2375] atexit reg
+2025-04-01 11:37:15,308 INFO    MainThread:618398 [wandb_run.py:_redirect():2227] redirect: wrap_raw
+2025-04-01 11:37:15,308 INFO    MainThread:618398 [wandb_run.py:_redirect():2292] Wrapping output streams.
+2025-04-01 11:37:15,308 INFO    MainThread:618398 [wandb_run.py:_redirect():2315] Redirects installed.
+2025-04-01 11:37:15,314 INFO    MainThread:618398 [wandb_init.py:init():1032] run started, returning control to user process
+2025-04-01 15:16:09,379 INFO    MainThread:618398 [wandb_run.py:_finish():2112] finishing run htlou/align-anything/csgd5etr
+2025-04-01 15:16:09,380 INFO    MainThread:618398 [wandb_run.py:_atexit_cleanup():2340] got exitcode: 0
+2025-04-01 15:16:09,381 INFO    MainThread:618398 [wandb_run.py:_restore():2322] restore
+2025-04-01 15:16:09,381 INFO    MainThread:618398 [wandb_run.py:_restore():2328] restore done
+2025-04-01 15:16:10,382 INFO    MainThread:618398 [wandb_run.py:_restore():2322] restore
+2025-04-01 15:16:10,382 INFO    MainThread:618398 [wandb_run.py:_restore():2328] restore done
+2025-04-01 15:16:10,382 ERROR   MainThread:618398 [wandb_run.py:_atexit_cleanup():2361] Problem finishing run
 Traceback (most recent call last):
   File "/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2352, in _atexit_cleanup
     self._on_finish()
   File "/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/concurrent/futures/thread.py", line 169, in submit
     raise RuntimeError('cannot schedule new futures after '
 RuntimeError: cannot schedule new futures after interpreter shutdown
+2025-04-01 15:16:10,873 INFO    MsgRouterThr:618398 [mailbox.py:close():129] Closing mailbox, abandoning 2 handles.

wandb/run-20250401_113714-csgd5etr/files/config.yaml ADDED Viewed

	@@ -0,0 +1,95 @@

+_wandb:
+    value:
+        cli_version: 0.19.8
+        m: []
+        python_version: 3.11.11
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 55
+                - 63
+                - 71
+                - 83
+                - 98
+                - 105
+            "2":
+                - 1
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 55
+                - 63
+                - 71
+                - 83
+                - 98
+                - 105
+            "3":
+                - 2
+                - 13
+                - 16
+                - 23
+                - 55
+                - 61
+            "4": 3.11.11
+            "5": 0.19.8
+            "6": 4.50.0
+            "8":
+                - 5
+            "12": 0.19.8
+            "13": linux-x86_64
+data_cfgs:
+    value:
+        eval_optional_args: []
+        train_datasets: /aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-7b_cosi_cut/merged/top1-40
+        train_name: text-image-to-text
+        train_optional_args: []
+        train_split: train
+        train_template: AA_TI2T_LLAVA
+logger_cfgs:
+    value:
+        log_project: align-anything
+        log_run_name: dpo
+        log_type: wandb
+        output_dir: ../outputs/llava_1.6_vicuna_7B_cosi/top1-40
+        save_total_limit: 3
+model_cfgs:
+    value:
+        model_max_length: 4096
+        model_name_or_path: /aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-7b-hf
+        trust_remote_code: true
+train_cfgs:
+    value:
+        adam_betas:
+            - 0.9
+            - 0.95
+        bf16: true
+        ds_cfgs: ds_z3_config.json
+        epochs: 3
+        eval_interval: 10
+        eval_strategy: epoch
+        fp16: false
+        freeze_language_model: false
+        freeze_mm_proj: false
+        freeze_vision_tower: true
+        gradient_accumulation_steps: 1
+        gradient_checkpointing: true
+        learning_rate: 1e-06
+        load_checkpoint: false
+        lr_scheduler_type: cosine
+        lr_warmup_ratio: 0.03
+        per_device_eval_batch_size: 1
+        per_device_train_batch_size: 1
+        regularization: 0.001
+        save_checkpoint: false
+        scale_coeff: 0.1
+        seed: 42
+        weight_decay: 0

wandb/run-20250401_113714-csgd5etr/files/output.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20250401_113714-csgd5etr/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,258 @@

+maskrcnn_benchmark==0.0.0
+webdataset==0.2.111
+websockets==15.0.1
+typer==0.15.2
+blobfile==3.0.0
+pooch==1.8.2
+filelock==3.18.0
+referencing==0.36.2
+matplotlib==3.10.1
+cachetools==5.5.2
+python-dateutil==2.9.0.post0
+gmpy2==2.2.1
+uvloop==0.21.0
+nvidia-cusparselt-cu12==0.6.2
+clip==0.2.0
+httpcore==1.0.7
+charset-normalizer==3.3.2
+torchlibrosa==0.1.0
+contourpy==1.3.1
+multiprocess==0.70.16
+nest-asyncio==1.6.0
+Werkzeug==3.1.3
+aiofiles==23.2.1
+six==1.17.0
+torch==2.6.0
+sse-starlette==2.2.1
+typing_extensions==4.12.2
+xgrammar==0.1.16
+psutil==7.0.0
+kiwisolver==1.4.8
+moviepy==2.1.2
+frozenlist==1.5.0
+jiter==0.9.0
+einops==0.8.1
+flash_attn==2.7.4.post1
+PySocks==1.7.1
+regex==2024.11.6
+markdown-it-py==3.0.0
+ruff==0.11.2
+docker-pycreds==0.4.0
+nvidia-nvtx-cu12==12.4.127
+pyparsing==3.2.3
+resampy==0.4.3
+tokenizers==0.21.0
+frechet-audio-distance==0.1.2
+aiohappyeyeballs==2.6.1
+llamafactory==0.9.3.dev0
+msgspec==0.19.0
+httpx==0.28.1
+encodec==0.1.1
+ffmpy==0.5.0
+jsonschema==4.23.0
+imageio-ffmpeg==0.6.0
+mkl_random==1.2.8
+fairscale==0.4.13
+soxr==0.5.0.post1
+lark==1.2.2
+gradio==5.21.0
+absl-py==2.2.1
+dnspython==2.7.0
+networkx==3.4.2
+h5py==3.13.0
+hjson==3.1.0
+tensorboard==2.19.0
+aiosignal==1.3.2
+pip==25.0
+nvidia-cublas-cu12==12.4.5.8
+llguidance==0.7.11
+zipp==3.21.0
+ftfy==6.3.1
+peft==0.15.0
+attrs==25.3.0
+trl==0.9.6
+requests==2.32.3
+progressbar==2.5
+sniffio==1.3.1
+pycountry==24.6.1
+lxml==5.3.1
+starlette==0.46.1
+pytest==7.2.0
+Markdown==3.7
+mdurl==0.1.2
+pyzmq==26.3.0
+safetensors==0.5.3
+opencv-python==4.6.0.66
+prometheus-fastapi-instrumentator==7.1.0
+shellingham==1.5.4
+torchvision==0.21.0
+pluggy==1.5.0
+timm==1.0.15
+multidict==6.2.0
+semantic-version==2.10.0
+airportsdata==20250224
+numba==0.60.0
+MarkupSafe==2.1.5
+pydantic_core==2.33.0
+imageio==2.37.0
+nvidia-nccl-cu12==2.21.5
+dill==0.3.8
+msgpack==1.1.0
+sentry-sdk==2.24.1
+rpds-py==0.24.0
+grpcio==1.71.0
+fastrlock==0.8.3
+python-json-logger==3.3.0
+cffi==1.17.1
+gradio_client==1.7.2
+PyYAML==6.0.2
+tensorboard-data-server==0.7.2
+termcolor==2.5.0
+torchaudio==2.6.0
+triton==3.2.0
+fastapi==0.115.12
+clint==0.5.1
+lazy_loader==0.4
+depyf==0.18.0
+mkl_fft==1.3.11
+annotated-types==0.7.0
+scikit-learn==1.6.1
+wget==3.2
+setuptools==75.8.0
+args==0.1.0
+certifi==2025.1.31
+click==8.1.8
+python-dotenv==1.1.0
+laion_clap==1.1.5
+Pygments==2.19.1
+tomlkit==0.13.2
+idna==3.7
+propcache==0.3.1
+platformdirs==4.3.7
+align-anything==0.0.1.dev0
+ray==2.44.1
+cloudpickle==3.1.1
+deepspeed==0.16.5
+smmap==5.0.2
+distro==1.9.0
+fonttools==4.56.0
+typing-inspection==0.4.0
+braceexpand==0.1.7
+decorator==5.2.1
+diskcache==5.6.3
+yt-dlp==2025.3.27
+shtab==1.7.1
+gguf==0.10.0
+interegular==0.3.3
+compressed-tensors==0.9.2
+pandas==2.2.3
+huggingface-hub==0.29.3
+pyarrow==19.0.1
+lm-format-enforcer==0.10.11
+GitPython==3.1.44
+xxhash==3.5.0
+packaging==24.2
+setproctitle==1.3.5
+llvmlite==0.43.0
+tiktoken==0.9.0
+mpmath==1.3.0
+email_validator==2.2.0
+nvidia-ml-py==12.570.86
+pydantic==2.11.0
+xformers==0.0.29.post2
+httptools==0.6.4
+librosa==0.11.0
+pytorch-fid==0.3.0
+hpsv2==1.2.0
+nvidia-cudnn-cu12==9.1.0.70
+iniconfig==2.1.0
+sympy==1.13.1
+safehttpx==0.1.6
+jsonschema-specifications==2024.10.1
+Jinja2==3.1.6
+tyro==0.8.14
+h11==0.14.0
+aiohttp==3.11.14
+diffusers==0.32.2
+tqdm==4.67.1
+blake3==1.0.4
+vllm==0.8.2
+scipy==1.10.1
+audioread==3.0.1
+proglog==0.1.10
+fire==0.7.0
+sentencepiece==0.2.0
+pytz==2025.2
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-curand-cu12==10.3.5.147
+numpy==1.26.4
+tzdata==2025.2
+python-multipart==0.0.20
+urllib3==2.3.0
+pycryptodomex==3.22.0
+yarl==1.18.3
+outlines==0.1.11
+nvidia-cusolver-cu12==11.6.1.9
+pydub==0.25.1
+mistral_common==1.5.4
+pycparser==2.22
+pytest-split==0.8.0
+datasets==3.4.1
+soundfile==0.13.1
+transformers==4.50.0
+image-reward==1.5
+wcwidth==0.2.13
+nvidia-cuda-nvrtc-cu12==12.4.127
+groovy==0.1.2
+nvidia-cuda-runtime-cu12==12.4.127
+astor==0.8.1
+anyio==4.9.0
+wandb==0.19.8
+joblib==1.4.2
+fsspec==2024.12.0
+accelerate==1.5.2
+py-cpuinfo==9.0.0
+docstring_parser==0.16
+partial-json-parser==0.2.1.1.post5
+nvidia-cusparse-cu12==12.3.1.170
+protobuf==3.20.3
+outlines_core==0.1.26
+nvidia-cufft-cu12==11.2.1.3
+cycler==0.12.1
+uvicorn==0.34.0
+orjson==3.10.16
+av==14.2.0
+Brotli==1.0.9
+cupy-cuda12x==13.4.1
+openai==1.69.0
+rich==13.9.4
+importlib_metadata==8.6.1
+ninja==1.11.1.4
+wheel==0.45.1
+pillow==10.4.0
+prometheus_client==0.21.1
+threadpoolctl==3.6.0
+gitdb==4.0.12
+watchfiles==1.0.4
+nvidia-cuda-cupti-cu12==12.4.127
+opencv-python-headless==4.11.0.86
+mkl-service==2.4.0
+rich-toolkit==0.14.0
+fastapi-cli==0.0.7
+llamafactory==0.9.3.dev0
+typing_extensions==4.12.2
+tomli==2.0.1
+zipp==3.19.2
+wheel==0.43.0
+jaraco.text==3.12.1
+packaging==24.2
+autocommand==2.2.2
+jaraco.functools==4.0.1
+jaraco.collections==5.1.0
+platformdirs==4.2.2
+more-itertools==10.3.0
+inflect==7.3.1
+jaraco.context==5.3.0
+typeguard==4.3.0
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0

wandb/run-20250401_113714-csgd5etr/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,106 @@

+{
+  "os": "Linux-5.15.0-1040-nvidia-x86_64-with-glibc2.35",
+  "python": "CPython 3.11.11",
+  "startedAt": "2025-04-01T03:37:14.205786Z",
+  "args": [
+    "--local_rank=0",
+    "--model_name_or_path",
+    "/aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-7b-hf",
+    "--train_datasets",
+    "/aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-7b_cosi_cut/merged/top1-40",
+    "--train_template",
+    "AA_TI2T_LLAVA",
+    "--train_split",
+    "train",
+    "--train_name",
+    "text-image-to-text",
+    "--output_dir",
+    "../outputs/llava_1.6_vicuna_7B_cosi/top1-40",
+    "--save_total_limit",
+    "3",
+    "--train_batch_size",
+    "8",
+    "--epochs",
+    "3"
+  ],
+  "program": "-m align_anything.trainers.text_image_to_text.dpo",
+  "git": {
+    "remote": "git@github.com-hantao:PKU-Alignment/align-anything.git",
+    "commit": "106588f9802757a3283c1aff1f33ea9afd737f31"
+  },
+  "email": "2200017789@stu.pku.edu.cn",
+  "root": "../outputs/llava_1.6_vicuna_7B_cosi/top1-40",
+  "host": "dgx-075",
+  "executable": "/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/python",
+  "cpu_count": 112,
+  "cpu_count_logical": 224,
+  "gpu": "NVIDIA H800",
+  "gpu_count": 8,
+  "disk": {
+    "/": {
+      "total": "1888556142592",
+      "used": "44054196224"
+    }
+  },
+  "memory": {
+    "total": "2164195545088"
+  },
+  "cpu": {
+    "count": 112,
+    "countLogical": 224
+  },
+  "gpu_nvidia": [
+    {
+      "name": "NVIDIA H800",
+      "memoryTotal": "85520809984",
+      "cudaCores": 16896,
+      "architecture": "Hopper"
+    },
+    {
+      "name": "NVIDIA H800",
+      "memoryTotal": "85520809984",
+      "cudaCores": 16896,
+      "architecture": "Hopper"
+    },
+    {
+      "name": "NVIDIA H800",
+      "memoryTotal": "85520809984",
+      "cudaCores": 16896,
+      "architecture": "Hopper"
+    },
+    {
+      "name": "NVIDIA H800",
+      "memoryTotal": "85520809984",
+      "cudaCores": 16896,
+      "architecture": "Hopper"
+    },
+    {
+      "name": "NVIDIA H800",
+      "memoryTotal": "85520809984",
+      "cudaCores": 16896,
+      "architecture": "Hopper"
+    },
+    {
+      "name": "NVIDIA H800",
+      "memoryTotal": "85520809984",
+      "cudaCores": 16896,
+      "architecture": "Hopper"
+    },
+    {
+      "name": "NVIDIA H800",
+      "memoryTotal": "85520809984",
+      "cudaCores": 16896,
+      "architecture": "Hopper"
+    },
+    {
+      "name": "NVIDIA H800",
+      "memoryTotal": "85520809984",
+      "cudaCores": 16896,
+      "architecture": "Hopper"
+    }
+  ],
+  "slurm": {
+    "conf": "/cm/shared/apps/slurm/var/etc/slurm/slurm.conf"
+  },
+  "cudaVersion": "12.2"
+}

wandb/run-20250401_113714-csgd5etr/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"train/better_sample_reward":-0.0546875,"train/reward":-14.1875,"_timestamp":1.74349172676146e+09,"_step":5835,"train/loss":0.00016117095947265625,"_wandb":{"runtime":13135},"_runtime":13135.208230328,"train/lr":0,"train/epoch":3,"train/step":5835,"train/reward_accuracy":1,"train/reward_margin":14,"train/worse_sample_reward":-14.125}

wandb/run-20250401_113714-csgd5etr/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-04-01T11:37:13.56798912+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpbbegb76s/port-618398.txt","pid":618398,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
+{"time":"2025-04-01T11:37:13.568984401+08:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":618398}
+{"time":"2025-04-01T11:37:13.568976592+08:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37095,"Zone":""}}
+{"time":"2025-04-01T11:37:13.750019106+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:61204"}
+{"time":"2025-04-01T11:37:14.207008815+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"csgd5etr","id":"127.0.0.1:61204"}
+{"time":"2025-04-01T11:37:14.524808389+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"csgd5etr","id":"127.0.0.1:61204"}
+{"time":"2025-04-01T15:16:10.873656844+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:61204"}
+{"time":"2025-04-01T15:16:10.873839315+08:00","level":"INFO","msg":"connection: closing","id":"127.0.0.1:61204"}
+{"time":"2025-04-01T15:16:10.873912438+08:00","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:61204"}
+{"time":"2025-04-01T15:16:10.873923718+08:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-04-01T15:16:11.763894135+08:00","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:37095->127.0.0.1:61204: use of closed network connection","id":"127.0.0.1:61204"}
+{"time":"2025-04-01T15:16:11.764226632+08:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:61204"}
+{"time":"2025-04-01T15:16:11.764237354+08:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:61204"}
+{"time":"2025-04-01T15:16:11.764243791+08:00","level":"INFO","msg":"server is closed"}

wandb/run-20250401_113714-csgd5etr/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,16 @@

+{"time":"2025-04-01T11:37:14.208599931+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.8","symlink path":"../outputs/llava_1.6_vicuna_7B_cosi/top1-40/wandb/run-20250401_113714-csgd5etr/logs/debug-core.log"}
+{"time":"2025-04-01T11:37:14.524748736+08:00","level":"INFO","msg":"created new stream","id":"csgd5etr"}
+{"time":"2025-04-01T11:37:14.524801572+08:00","level":"INFO","msg":"stream: started","id":"csgd5etr"}
+{"time":"2025-04-01T11:37:14.524825793+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"csgd5etr"}
+{"time":"2025-04-01T11:37:14.524831464+08:00","level":"INFO","msg":"handler: started","stream_id":"csgd5etr"}
+{"time":"2025-04-01T11:37:14.524868464+08:00","level":"INFO","msg":"sender: started","stream_id":"csgd5etr"}
+{"time":"2025-04-01T11:37:14.908466405+08:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-04-01T15:16:09.414058626+08:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-04-01T15:16:09.421777269+08:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-04-01T15:16:10.873890429+08:00","level":"INFO","msg":"stream: closing","id":"csgd5etr"}
+{"time":"2025-04-01T15:16:10.873923324+08:00","level":"WARN","msg":"sender: received Exit record more than once, ignoring"}
+{"time":"2025-04-01T15:16:11.529727104+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-04-01T15:16:11.763778385+08:00","level":"INFO","msg":"handler: closed","stream_id":"csgd5etr"}
+{"time":"2025-04-01T15:16:11.763804921+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"csgd5etr"}
+{"time":"2025-04-01T15:16:11.763934707+08:00","level":"INFO","msg":"sender: closed","stream_id":"csgd5etr"}
+{"time":"2025-04-01T15:16:11.76416327+08:00","level":"INFO","msg":"stream: closed","id":"csgd5etr"}