Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- arguments.yaml +11 -13
- environ.txt +32 -63
- script.sh +28 -26
- slice_1946/added_tokens.json +3 -0
- slice_1946/chat_template.json +3 -0
- slice_1946/config.json +89 -0
- slice_1946/preprocessor_config.json +52 -0
- slice_1946/processor_config.json +7 -0
- slice_1946/pytorch_model.bin +3 -0
- slice_1946/special_tokens_map.json +31 -0
- slice_1946/tokenizer.json +0 -0
- slice_1946/tokenizer.model +3 -0
- slice_1946/tokenizer_config.json +56 -0
- slice_2919/added_tokens.json +3 -0
- slice_2919/chat_template.json +3 -0
- slice_2919/config.json +89 -0
- slice_2919/preprocessor_config.json +52 -0
- slice_2919/processor_config.json +7 -0
- slice_2919/pytorch_model.bin +3 -0
- slice_2919/special_tokens_map.json +31 -0
- slice_2919/tokenizer.json +0 -0
- slice_2919/tokenizer.model +3 -0
- slice_2919/tokenizer_config.json +56 -0
- slice_973/added_tokens.json +3 -0
- slice_973/chat_template.json +3 -0
- slice_973/config.json +89 -0
- slice_973/preprocessor_config.json +52 -0
- slice_973/processor_config.json +7 -0
- slice_973/pytorch_model.bin +3 -0
- slice_973/special_tokens_map.json +31 -0
- slice_973/tokenizer.json +0 -0
- slice_973/tokenizer.model +3 -0
- slice_973/tokenizer_config.json +56 -0
- slice_end/added_tokens.json +1 -2
- slice_end/config.json +13 -9
- slice_end/pytorch_model.bin +2 -2
- slice_end/special_tokens_map.json +1 -1
- slice_end/tokenizer.json +0 -0
- slice_end/tokenizer.model +2 -2
- slice_end/tokenizer_config.json +5 -18
- wandb/debug-internal.log +16 -18
- wandb/debug.log +30 -30
- wandb/run-20250401_092824-bd2o51v8/files/config.yaml +95 -0
- wandb/run-20250401_092824-bd2o51v8/files/output.log +0 -0
- wandb/run-20250401_092824-bd2o51v8/files/requirements.txt +258 -0
- wandb/run-20250401_092824-bd2o51v8/files/wandb-metadata.json +106 -0
- wandb/run-20250401_092824-bd2o51v8/files/wandb-summary.json +1 -0
- wandb/run-20250401_092824-bd2o51v8/logs/debug-core.log +14 -0
- wandb/run-20250401_092824-bd2o51v8/logs/debug-internal.log +16 -0
.gitattributes
CHANGED
|
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
wandb/run-20250331_174515-0lcg01si/run-0lcg01si.wandb filter=lfs diff=lfs merge=lfs -text
|
| 37 |
wandb/run-20250329_013436-woy6qs03/run-woy6qs03.wandb filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
wandb/run-20250331_174515-0lcg01si/run-0lcg01si.wandb filter=lfs diff=lfs merge=lfs -text
|
| 37 |
wandb/run-20250329_013436-woy6qs03/run-woy6qs03.wandb filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
wandb/run-20250401_092824-bd2o51v8/run-bd2o51v8.wandb filter=lfs diff=lfs merge=lfs -text
|
arguments.yaml
CHANGED
|
@@ -1,37 +1,34 @@
|
|
| 1 |
data_cfgs:
|
| 2 |
eval_data_files: {}
|
| 3 |
eval_datasets: {}
|
| 4 |
-
eval_name: {}
|
| 5 |
eval_optional_args: []
|
| 6 |
eval_size: {}
|
| 7 |
eval_split: {}
|
| 8 |
eval_subset: {}
|
| 9 |
eval_template: {}
|
| 10 |
-
load_multi_datasets: false
|
| 11 |
train_data_files: {}
|
| 12 |
-
train_datasets: /aifs4su/yaodong/hantao/datasets/
|
| 13 |
train_name: text-image-to-text
|
| 14 |
train_optional_args: []
|
| 15 |
train_size: {}
|
| 16 |
train_split: train
|
| 17 |
-
train_template:
|
| 18 |
logger_cfgs:
|
| 19 |
cache_dir: {}
|
| 20 |
log_project: align-anything
|
| 21 |
-
log_run_name:
|
| 22 |
log_type: wandb
|
| 23 |
-
output_dir: ../outputs/
|
| 24 |
-
save_total_limit:
|
| 25 |
model_cfgs:
|
| 26 |
-
model_max_length:
|
| 27 |
-
model_name_or_path: /aifs4su/yaodong/hantao/models/llava-v1.6-
|
| 28 |
trust_remote_code: true
|
| 29 |
special_tokens: {}
|
| 30 |
train_cfgs:
|
| 31 |
adam_betas:
|
| 32 |
- 0.9
|
| 33 |
- 0.95
|
| 34 |
-
adam_epsilon: 1.0e-08
|
| 35 |
bf16: true
|
| 36 |
ds_cfgs: ds_z3_config.json
|
| 37 |
epochs: 3
|
|
@@ -41,15 +38,16 @@ train_cfgs:
|
|
| 41 |
freeze_language_model: false
|
| 42 |
freeze_mm_proj: false
|
| 43 |
freeze_vision_tower: true
|
| 44 |
-
gradient_accumulation_steps:
|
| 45 |
gradient_checkpointing: true
|
| 46 |
-
learning_rate:
|
| 47 |
load_checkpoint: false
|
| 48 |
lr_scheduler_type: cosine
|
| 49 |
lr_warmup_ratio: 0.03
|
| 50 |
-
max_grad_norm: 1.0
|
| 51 |
per_device_eval_batch_size: 1
|
| 52 |
per_device_train_batch_size: 1
|
|
|
|
| 53 |
save_checkpoint: false
|
|
|
|
| 54 |
seed: 42
|
| 55 |
weight_decay: 0.0
|
|
|
|
| 1 |
data_cfgs:
|
| 2 |
eval_data_files: {}
|
| 3 |
eval_datasets: {}
|
|
|
|
| 4 |
eval_optional_args: []
|
| 5 |
eval_size: {}
|
| 6 |
eval_split: {}
|
| 7 |
eval_subset: {}
|
| 8 |
eval_template: {}
|
|
|
|
| 9 |
train_data_files: {}
|
| 10 |
+
train_datasets: /aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-7b_cosi_cut/merged/top1-20
|
| 11 |
train_name: text-image-to-text
|
| 12 |
train_optional_args: []
|
| 13 |
train_size: {}
|
| 14 |
train_split: train
|
| 15 |
+
train_template: AA_TI2T_LLAVA
|
| 16 |
logger_cfgs:
|
| 17 |
cache_dir: {}
|
| 18 |
log_project: align-anything
|
| 19 |
+
log_run_name: dpo
|
| 20 |
log_type: wandb
|
| 21 |
+
output_dir: ../outputs/llava_1.6_vicuna_7B_cosi/top1-20
|
| 22 |
+
save_total_limit: 3
|
| 23 |
model_cfgs:
|
| 24 |
+
model_max_length: 4096
|
| 25 |
+
model_name_or_path: /aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-7b-hf
|
| 26 |
trust_remote_code: true
|
| 27 |
special_tokens: {}
|
| 28 |
train_cfgs:
|
| 29 |
adam_betas:
|
| 30 |
- 0.9
|
| 31 |
- 0.95
|
|
|
|
| 32 |
bf16: true
|
| 33 |
ds_cfgs: ds_z3_config.json
|
| 34 |
epochs: 3
|
|
|
|
| 38 |
freeze_language_model: false
|
| 39 |
freeze_mm_proj: false
|
| 40 |
freeze_vision_tower: true
|
| 41 |
+
gradient_accumulation_steps: 1
|
| 42 |
gradient_checkpointing: true
|
| 43 |
+
learning_rate: 1.0e-06
|
| 44 |
load_checkpoint: false
|
| 45 |
lr_scheduler_type: cosine
|
| 46 |
lr_warmup_ratio: 0.03
|
|
|
|
| 47 |
per_device_eval_batch_size: 1
|
| 48 |
per_device_train_batch_size: 1
|
| 49 |
+
regularization: 0.001
|
| 50 |
save_checkpoint: false
|
| 51 |
+
scale_coeff: 0.1
|
| 52 |
seed: 42
|
| 53 |
weight_decay: 0.0
|
environ.txt
CHANGED
|
@@ -81,84 +81,48 @@ BASH_FUNC_switchml%%=() { typeset swfound=1;
|
|
| 81 |
return 1;
|
| 82 |
fi
|
| 83 |
}
|
|
|
|
| 84 |
BUILD=x86_64-conda-linux-gnu
|
|
|
|
| 85 |
CC=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cc
|
| 86 |
CC_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cc
|
| 87 |
-
CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/
|
| 88 |
CMAKE_ARGS=-DCMAKE_AR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ar -DCMAKE_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ranlib -DCMAKE_LINKER=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ld -DCMAKE_STRIP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strip -DCMAKE_BUILD_TYPE=Release
|
| 89 |
CMAKE_PREFIX_PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama:/aifs4su/yaodong/miniconda3/envs/hantao_llama/x86_64-conda-linux-gnu/sysroot/usr
|
| 90 |
CMD_WLM_CLUSTER_NAME=slurm
|
| 91 |
-
|
| 92 |
-
CONDA_BACKUP_AR=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-ar
|
| 93 |
-
CONDA_BACKUP_AS=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-as
|
| 94 |
-
CONDA_BACKUP_BUILD=x86_64-conda-linux-gnu
|
| 95 |
-
CONDA_BACKUP_CC=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-cc
|
| 96 |
-
CONDA_BACKUP_CC_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-cc
|
| 97 |
-
CONDA_BACKUP_CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include
|
| 98 |
-
CONDA_BACKUP_CMAKE_PREFIX_PATH=/aifs4su/yaodong/miniconda3/envs/hantao_anything:/aifs4su/yaodong/miniconda3/envs/hantao_anything/x86_64-conda-linux-gnu/sysroot/usr
|
| 99 |
-
CONDA_BACKUP_CONDA_BUILD_SYSROOT=/aifs4su/yaodong/miniconda3/envs/hantao_anything/x86_64-conda-linux-gnu/sysroot
|
| 100 |
-
CONDA_BACKUP_CPP=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-cpp
|
| 101 |
-
CONDA_BACKUP_CPPFLAGS=-DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include
|
| 102 |
-
CONDA_BACKUP_CXX=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-c++
|
| 103 |
-
CONDA_BACKUP_CXXFILT=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-c++filt
|
| 104 |
-
CONDA_BACKUP_CXXFLAGS=-fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include
|
| 105 |
-
CONDA_BACKUP_CXX_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-c++
|
| 106 |
-
CONDA_BACKUP_DEBUG_CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include
|
| 107 |
-
CONDA_BACKUP_DEBUG_CPPFLAGS=-D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include
|
| 108 |
-
CONDA_BACKUP_DEBUG_CXXFLAGS=-fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include
|
| 109 |
-
CONDA_BACKUP_ELFEDIT=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-elfedit
|
| 110 |
-
CONDA_BACKUP_GCC=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-gcc
|
| 111 |
-
CONDA_BACKUP_GCC_AR=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-gcc-ar
|
| 112 |
-
CONDA_BACKUP_GCC_NM=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-gcc-nm
|
| 113 |
-
CONDA_BACKUP_GCC_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-gcc-ranlib
|
| 114 |
-
CONDA_BACKUP_GPROF=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-gprof
|
| 115 |
-
CONDA_BACKUP_GXX=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-g++
|
| 116 |
-
CONDA_BACKUP_HOST=x86_64-conda-linux-gnu
|
| 117 |
-
CONDA_BACKUP_LD=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-ld
|
| 118 |
-
CONDA_BACKUP_LDFLAGS=-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,-rpath,/aifs4su/yaodong/miniconda3/envs/hantao_anything/lib -Wl,-rpath-link,/aifs4su/yaodong/miniconda3/envs/hantao_anything/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_anything/lib
|
| 119 |
-
CONDA_BACKUP_LD_GOLD=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-ld.gold
|
| 120 |
-
CONDA_BACKUP_NM=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-nm
|
| 121 |
-
CONDA_BACKUP_OBJCOPY=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-objcopy
|
| 122 |
-
CONDA_BACKUP_OBJDUMP=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-objdump
|
| 123 |
-
CONDA_BACKUP_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-ranlib
|
| 124 |
-
CONDA_BACKUP_READELF=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-readelf
|
| 125 |
-
CONDA_BACKUP_SIZE=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-size
|
| 126 |
-
CONDA_BACKUP_STRINGS=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-strings
|
| 127 |
-
CONDA_BACKUP_STRIP=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-strip
|
| 128 |
-
CONDA_BACKUP__CONDA_PYTHON_SYSCONFIGDATA_NAME=_sysconfigdata_x86_64_conda_cos7_linux_gnu
|
| 129 |
-
CONDA_BACKUP_build_alias=x86_64-conda-linux-gnu
|
| 130 |
-
CONDA_BACKUP_host_alias=x86_64-conda-linux-gnu
|
| 131 |
CONDA_BUILD_SYSROOT=/aifs4su/yaodong/miniconda3/envs/hantao_llama/x86_64-conda-linux-gnu/sysroot
|
| 132 |
CONDA_DEFAULT_ENV=hantao_llama
|
| 133 |
CONDA_EXE=/aifs4su/yaodong/miniconda3/bin/conda
|
| 134 |
CONDA_PREFIX=/aifs4su/yaodong/miniconda3/envs/hantao_llama
|
| 135 |
CONDA_PREFIX_1=/aifs4su/yaodong/miniconda3
|
| 136 |
-
CONDA_PREFIX_2=/aifs4su/yaodong/miniconda3/envs/hantao_anything
|
| 137 |
CONDA_PROMPT_MODIFIER=(hantao_llama)
|
| 138 |
CONDA_PYTHON_EXE=/aifs4su/yaodong/miniconda3/bin/python
|
| 139 |
-
CONDA_SHLVL=
|
| 140 |
CPATH=/cm/shared/apps/slurm/current/include
|
| 141 |
CPATH_modshare=/cm/shared/apps/slurm/current/include:1
|
| 142 |
CPP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cpp
|
| 143 |
-
CPPFLAGS=-DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /aifs4su/yaodong/miniconda3/envs/
|
| 144 |
CROSS_RANK=0
|
| 145 |
CROSS_SIZE=1
|
| 146 |
CUDA_MODULE_LOADING=LAZY
|
| 147 |
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
| 148 |
CXX=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-c++
|
| 149 |
CXXFILT=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-c++filt
|
| 150 |
-
CXXFLAGS=-fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/
|
| 151 |
CXX_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-c++
|
| 152 |
DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1028/bus
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
|
|
|
| 156 |
ELFEDIT=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-elfedit
|
| 157 |
ENABLE_LMOD=0
|
| 158 |
GCC=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc
|
| 159 |
GCC_AR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-ar
|
| 160 |
GCC_NM=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-nm
|
| 161 |
GCC_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-ranlib
|
|
|
|
| 162 |
GPROF=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gprof
|
| 163 |
GSETTINGS_SCHEMA_DIR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/share/glib-2.0/schemas
|
| 164 |
GSETTINGS_SCHEMA_DIR_CONDA_BACKUP=
|
|
@@ -172,7 +136,7 @@ KMP_DUPLICATE_LIB_OK=True
|
|
| 172 |
KMP_INIT_AT_FORK=FALSE
|
| 173 |
LANG=C.UTF-8
|
| 174 |
LD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ld
|
| 175 |
-
LDFLAGS=-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,-rpath,/aifs4su/yaodong/miniconda3/envs/
|
| 176 |
LD_GOLD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ld.gold
|
| 177 |
LD_LIBRARY_PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/cv2/../../lib64:/usr/mpi/gcc/openmpi-4.1.7a1/lib:/cm/shared/apps/slurm/current/lib64/slurm:/cm/shared/apps/slurm/current/lib64
|
| 178 |
LD_LIBRARY_PATH_modshare=/cm/shared/apps/slurm/current/lib64:1:/usr/mpi/gcc/openmpi-4.1.7a1/lib:1:/cm/shared/apps/slurm/current/lib64/slurm:1
|
|
@@ -192,7 +156,7 @@ LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd
|
|
| 192 |
MANPATH=/usr/mpi/gcc/openmpi-4.1.7a1/share/man:/cm/shared/apps/slurm/current/man:/cm/local/apps/environment-modules/4.5.3/share/man:/usr/local/man:/usr/local/share/man:/usr/share/man:/cm/local/apps/environment-modules/current/share/man:/cm/local/apps/environment-modules/current/share/man
|
| 193 |
MANPATH_modshare=/usr/local/share/man:1:/usr/mpi/gcc/openmpi-4.1.7a1/share/man:1:/cm/local/apps/environment-modules/current/share/man:1:/cm/local/apps/environment-modules/4.5.3/share/man:1:/usr/local/man:1:/usr/share/man:1:/cm/shared/apps/slurm/current/man:1
|
| 194 |
MASTER_ADDR=127.0.0.1
|
| 195 |
-
MASTER_PORT=
|
| 196 |
MIG_PARTED_CHECKPOINT_FILE=/var/lib/nvidia-mig-manager/checkpoint.json
|
| 197 |
MIG_PARTED_CONFIG_FILE=/etc/nvidia-mig-manager/config.yaml
|
| 198 |
MIG_PARTED_HOOKS_FILE=/etc/nvidia-mig-manager/hooks.yaml
|
|
@@ -209,39 +173,44 @@ NVCC_PREPEND_FLAGS_BACKUP= -ccbin=/aifs4su/yaodong/miniconda3/bin/x86_64-conda-l
|
|
| 209 |
NVITOP_MONITOR_MODE=colorful
|
| 210 |
OBJCOPY=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-objcopy
|
| 211 |
OBJDUMP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-objdump
|
| 212 |
-
OLDPWD=/
|
| 213 |
-
PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin:/
|
| 214 |
-
PATH_modshare=/usr/mpi/gcc/openmpi-4.1.7a1/bin:1:/opt/bin/:1:/usr/bin:1:/usr/local/bin:1:/cm/shared/apps/slurm/current/bin:1:/cm/shared/apps/slurm/current/sbin:1:/bin:1:/snap/bin:1:/sbin:1:/usr/sbin:1:/cm/local/apps/environment-modules/4.5.3/bin:1:/usr/
|
| 215 |
PWD=/aifs4su/yaodong/hantao/align-anything/scripts
|
|
|
|
| 216 |
PYTHONHASHSEED=42
|
| 217 |
PYTHONPATH=/aifs4su/yaodong/hantao/align-anything/scripts
|
| 218 |
-
QT_QPA_FONTDIR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/cv2/qt/fonts
|
| 219 |
-
QT_QPA_PLATFORM_PLUGIN_PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/cv2/qt/plugins
|
| 220 |
RANK=0
|
| 221 |
RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ranlib
|
| 222 |
READELF=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-readelf
|
| 223 |
SHELL=/bin/bash
|
| 224 |
-
SHLVL=
|
| 225 |
SIZE=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-size
|
| 226 |
SLURM_CONF=/cm/shared/apps/slurm/var/etc/slurm/slurm.conf
|
| 227 |
-
SSH_CLIENT=10.33.4.
|
| 228 |
-
SSH_CONNECTION=10.33.4.
|
| 229 |
-
|
|
|
|
| 230 |
STRINGS=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strings
|
| 231 |
STRIP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strip
|
| 232 |
TERM=screen
|
| 233 |
TERM_PROGRAM=tmux
|
| 234 |
TERM_PROGRAM_VERSION=3.2a
|
| 235 |
-
TMUX=/tmp/tmux-1028/default,
|
| 236 |
-
TMUX_PANE=%
|
| 237 |
USER=yangyaodong
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
WANDB_API_KEY=7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33
|
| 239 |
-
WANDB_SERVICE=2-
|
| 240 |
WORLD_SIZE=8
|
| 241 |
XDG_DATA_DIRS=/usr/local/share:/usr/share:/var/lib/snapd/desktop
|
| 242 |
XDG_RUNTIME_DIR=/run/user/1028
|
| 243 |
XDG_SESSION_CLASS=user
|
| 244 |
-
XDG_SESSION_ID=
|
| 245 |
XDG_SESSION_TYPE=tty
|
| 246 |
ZERO_STAGE=3
|
| 247 |
_=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/deepspeed
|
|
|
|
| 81 |
return 1;
|
| 82 |
fi
|
| 83 |
}
|
| 84 |
+
BROWSER=/home/yangyaodong/.vscode-server/cli/servers/Stable-e54c774e0add60467559eb0d1e229c6452cf8447/server/bin/helpers/browser.sh
|
| 85 |
BUILD=x86_64-conda-linux-gnu
|
| 86 |
+
BUNDLED_DEBUGPY_PATH=/home/yangyaodong/.vscode-server/extensions/ms-python.debugpy-2025.0.1-linux-x64/bundled/libs/debugpy
|
| 87 |
CC=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cc
|
| 88 |
CC_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cc
|
| 89 |
+
CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include -I/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/include -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
|
| 90 |
CMAKE_ARGS=-DCMAKE_AR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ar -DCMAKE_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ranlib -DCMAKE_LINKER=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ld -DCMAKE_STRIP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strip -DCMAKE_BUILD_TYPE=Release
|
| 91 |
CMAKE_PREFIX_PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama:/aifs4su/yaodong/miniconda3/envs/hantao_llama/x86_64-conda-linux-gnu/sysroot/usr
|
| 92 |
CMD_WLM_CLUSTER_NAME=slurm
|
| 93 |
+
COLORTERM=truecolor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
CONDA_BUILD_SYSROOT=/aifs4su/yaodong/miniconda3/envs/hantao_llama/x86_64-conda-linux-gnu/sysroot
|
| 95 |
CONDA_DEFAULT_ENV=hantao_llama
|
| 96 |
CONDA_EXE=/aifs4su/yaodong/miniconda3/bin/conda
|
| 97 |
CONDA_PREFIX=/aifs4su/yaodong/miniconda3/envs/hantao_llama
|
| 98 |
CONDA_PREFIX_1=/aifs4su/yaodong/miniconda3
|
|
|
|
| 99 |
CONDA_PROMPT_MODIFIER=(hantao_llama)
|
| 100 |
CONDA_PYTHON_EXE=/aifs4su/yaodong/miniconda3/bin/python
|
| 101 |
+
CONDA_SHLVL=2
|
| 102 |
CPATH=/cm/shared/apps/slurm/current/include
|
| 103 |
CPATH_modshare=/cm/shared/apps/slurm/current/include:1
|
| 104 |
CPP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cpp
|
| 105 |
+
CPPFLAGS=-DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include -I/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/include -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
|
| 106 |
CROSS_RANK=0
|
| 107 |
CROSS_SIZE=1
|
| 108 |
CUDA_MODULE_LOADING=LAZY
|
| 109 |
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
| 110 |
CXX=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-c++
|
| 111 |
CXXFILT=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-c++filt
|
| 112 |
+
CXXFLAGS=-fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include -I/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/include -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
|
| 113 |
CXX_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-c++
|
| 114 |
DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1028/bus
|
| 115 |
+
DEBUGPY_ADAPTER_ENDPOINTS=/home/yangyaodong/.vscode-server/extensions/ms-python.debugpy-2025.0.1-linux-x64/.noConfigDebugAdapterEndpoints/endpoint-cf2a8fd1c0b5bb2d.txt
|
| 116 |
+
DEBUG_CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include
|
| 117 |
+
DEBUG_CPPFLAGS=-D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include
|
| 118 |
+
DEBUG_CXXFLAGS=-fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include
|
| 119 |
ELFEDIT=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-elfedit
|
| 120 |
ENABLE_LMOD=0
|
| 121 |
GCC=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc
|
| 122 |
GCC_AR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-ar
|
| 123 |
GCC_NM=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-nm
|
| 124 |
GCC_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-ranlib
|
| 125 |
+
GIT_ASKPASS=/home/yangyaodong/.vscode-server/cli/servers/Stable-e54c774e0add60467559eb0d1e229c6452cf8447/server/extensions/git/dist/askpass.sh
|
| 126 |
GPROF=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gprof
|
| 127 |
GSETTINGS_SCHEMA_DIR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/share/glib-2.0/schemas
|
| 128 |
GSETTINGS_SCHEMA_DIR_CONDA_BACKUP=
|
|
|
|
| 136 |
KMP_INIT_AT_FORK=FALSE
|
| 137 |
LANG=C.UTF-8
|
| 138 |
LD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ld
|
| 139 |
+
LDFLAGS=-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,-rpath,/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib -Wl,-rpath-link,/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
|
| 140 |
LD_GOLD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ld.gold
|
| 141 |
LD_LIBRARY_PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/cv2/../../lib64:/usr/mpi/gcc/openmpi-4.1.7a1/lib:/cm/shared/apps/slurm/current/lib64/slurm:/cm/shared/apps/slurm/current/lib64
|
| 142 |
LD_LIBRARY_PATH_modshare=/cm/shared/apps/slurm/current/lib64:1:/usr/mpi/gcc/openmpi-4.1.7a1/lib:1:/cm/shared/apps/slurm/current/lib64/slurm:1
|
|
|
|
| 156 |
MANPATH=/usr/mpi/gcc/openmpi-4.1.7a1/share/man:/cm/shared/apps/slurm/current/man:/cm/local/apps/environment-modules/4.5.3/share/man:/usr/local/man:/usr/local/share/man:/usr/share/man:/cm/local/apps/environment-modules/current/share/man:/cm/local/apps/environment-modules/current/share/man
|
| 157 |
MANPATH_modshare=/usr/local/share/man:1:/usr/mpi/gcc/openmpi-4.1.7a1/share/man:1:/cm/local/apps/environment-modules/current/share/man:1:/cm/local/apps/environment-modules/4.5.3/share/man:1:/usr/local/man:1:/usr/share/man:1:/cm/shared/apps/slurm/current/man:1
|
| 158 |
MASTER_ADDR=127.0.0.1
|
| 159 |
+
MASTER_PORT=17196
|
| 160 |
MIG_PARTED_CHECKPOINT_FILE=/var/lib/nvidia-mig-manager/checkpoint.json
|
| 161 |
MIG_PARTED_CONFIG_FILE=/etc/nvidia-mig-manager/config.yaml
|
| 162 |
MIG_PARTED_HOOKS_FILE=/etc/nvidia-mig-manager/hooks.yaml
|
|
|
|
| 173 |
NVITOP_MONITOR_MODE=colorful
|
| 174 |
OBJCOPY=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-objcopy
|
| 175 |
OBJDUMP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-objdump
|
| 176 |
+
OLDPWD=/home/yangyaodong
|
| 177 |
+
PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin:/usr/lpp/mmfs/bin:/usr/local/cuda/bin:/opt/bin:/usr/lpp/mmfs/bin:/cm/shared/apps/slurm/current/sbin:/cm/shared/apps/slurm/current/bin:/usr/local/cuda/bin:/opt/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/sbin:/usr/sbin:/cm/local/apps/environment-modules/4.5.3/bin
|
| 178 |
+
PATH_modshare=/usr/mpi/gcc/openmpi-4.1.7a1/bin:1:/opt/bin/:1:/usr/bin:1:/usr/local/bin:1:/cm/shared/apps/slurm/current/bin:1:/home/yangyaodong/.vscode-server/cli/servers/Stable-e54c774e0add60467559eb0d1e229c6452cf8447/server/bin/remote-cli:1:/cm/shared/apps/slurm/current/sbin:1:/bin:1:/snap/bin:1:/sbin:1:/home/yangyaodong/.vscode-server/data/User/globalStorage/github.copilot-chat/debugCommand:1:/home/yangyaodong/.vscode-server/extensions/ms-python.debugpy-2025.0.1-linux-x64/bundled/scripts/noConfigScripts:1:/usr/sbin:1:/usr/games:1:/cm/local/apps/environment-modules/4.5.3/bin:1:/usr/local/sbin:1:/usr/lpp/mmfs/bin:1:/usr/local/cuda/bin:1:/usr/local/games:1
|
| 179 |
PWD=/aifs4su/yaodong/hantao/align-anything/scripts
|
| 180 |
+
PYDEVD_DISABLE_FILE_VALIDATION=1
|
| 181 |
PYTHONHASHSEED=42
|
| 182 |
PYTHONPATH=/aifs4su/yaodong/hantao/align-anything/scripts
|
|
|
|
|
|
|
| 183 |
RANK=0
|
| 184 |
RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ranlib
|
| 185 |
READELF=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-readelf
|
| 186 |
SHELL=/bin/bash
|
| 187 |
+
SHLVL=4
|
| 188 |
SIZE=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-size
|
| 189 |
SLURM_CONF=/cm/shared/apps/slurm/var/etc/slurm/slurm.conf
|
| 190 |
+
SSH_CLIENT=10.33.4.51 46666 22
|
| 191 |
+
SSH_CONNECTION=10.33.4.230 40638 10.33.4.213 22
|
| 192 |
+
SSL_CERT_DIR=/usr/lib/ssl/certs
|
| 193 |
+
SSL_CERT_FILE=/usr/lib/ssl/certs/ca-certificates.crt
|
| 194 |
STRINGS=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strings
|
| 195 |
STRIP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strip
|
| 196 |
TERM=screen
|
| 197 |
TERM_PROGRAM=tmux
|
| 198 |
TERM_PROGRAM_VERSION=3.2a
|
| 199 |
+
TMUX=/tmp/tmux-1028/default,2296743,10
|
| 200 |
+
TMUX_PANE=%25
|
| 201 |
USER=yangyaodong
|
| 202 |
+
VSCODE_GIT_ASKPASS_EXTRA_ARGS=
|
| 203 |
+
VSCODE_GIT_ASKPASS_MAIN=/home/yangyaodong/.vscode-server/cli/servers/Stable-e54c774e0add60467559eb0d1e229c6452cf8447/server/extensions/git/dist/askpass-main.js
|
| 204 |
+
VSCODE_GIT_ASKPASS_NODE=/home/yangyaodong/.vscode-server/cli/servers/Stable-e54c774e0add60467559eb0d1e229c6452cf8447/server/node
|
| 205 |
+
VSCODE_GIT_IPC_HANDLE=/run/user/1028/vscode-git-bbbbf321f6.sock
|
| 206 |
+
VSCODE_IPC_HOOK_CLI=/run/user/1028/vscode-ipc-e2edf668-dca9-4331-a6ac-7d4507f653ce.sock
|
| 207 |
WANDB_API_KEY=7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33
|
| 208 |
+
WANDB_SERVICE=2-516629-tcp-localhost-38869
|
| 209 |
WORLD_SIZE=8
|
| 210 |
XDG_DATA_DIRS=/usr/local/share:/usr/share:/var/lib/snapd/desktop
|
| 211 |
XDG_RUNTIME_DIR=/run/user/1028
|
| 212 |
XDG_SESSION_CLASS=user
|
| 213 |
+
XDG_SESSION_ID=43255
|
| 214 |
XDG_SESSION_TYPE=tty
|
| 215 |
ZERO_STAGE=3
|
| 216 |
_=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/deepspeed
|
script.sh
CHANGED
|
@@ -15,35 +15,37 @@
|
|
| 15 |
# limitations under the License.
|
| 16 |
# ==============================================================================
|
| 17 |
|
| 18 |
-
DATASETS_NAME=("top1-20" "top1-40"
|
| 19 |
|
| 20 |
-
MODEL_NAME_OR_PATH="/aifs4su/yaodong/hantao/models/llava-v1.6-
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
| 25 |
TRAIN_NAME="text-image-to-text" # dataset name
|
| 26 |
TRAIN_SPLIT="train" # split the dataset
|
| 27 |
|
| 28 |
-
OUTPUT_DIR="../outputs/
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
done
|
|
|
|
| 15 |
# limitations under the License.
|
| 16 |
# ==============================================================================
|
| 17 |
|
| 18 |
+
DATASETS_NAME=("top1-20" "top1-40")
|
| 19 |
|
| 20 |
+
MODEL_NAME_OR_PATH="/aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-7b-hf" # model path
|
| 21 |
|
| 22 |
+
# HOSTFILE="/aifs4su/yaodong/hantao/align-anything/scripts/.hostfile"
|
| 23 |
+
|
| 24 |
+
for DATASET_NAME in ${DATASETS_NAME[@]}; do
|
| 25 |
+
TRAIN_DATASETS="/aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-7b_cosi_cut/merged/${DATASET_NAME}" # dataset path
|
| 26 |
+
TRAIN_TEMPLATE="AA_TI2T_LLAVA" # dataset template
|
| 27 |
TRAIN_NAME="text-image-to-text" # dataset name
|
| 28 |
TRAIN_SPLIT="train" # split the dataset
|
| 29 |
|
| 30 |
+
OUTPUT_DIR="../outputs/llava_1.6_vicuna_7B_cosi/${DATASET_NAME}" # output dir
|
| 31 |
+
|
| 32 |
+
# For wandb online logging
|
| 33 |
+
export WANDB_API_KEY="7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33"
|
| 34 |
+
|
| 35 |
+
# Source the setup script
|
| 36 |
+
source ./setup.sh
|
| 37 |
+
|
| 38 |
+
# Execute deepspeed command
|
| 39 |
+
deepspeed \
|
| 40 |
+
--master_port ${MASTER_PORT} \
|
| 41 |
+
--module align_anything.trainers.text_image_to_text.dpo \
|
| 42 |
+
--model_name_or_path ${MODEL_NAME_OR_PATH} \
|
| 43 |
+
--train_datasets ${TRAIN_DATASETS} \
|
| 44 |
+
--train_template ${TRAIN_TEMPLATE} \
|
| 45 |
+
--train_split ${TRAIN_SPLIT} \
|
| 46 |
+
--train_name ${TRAIN_NAME} \
|
| 47 |
+
--output_dir ${OUTPUT_DIR} \
|
| 48 |
+
--save_total_limit 3 \
|
| 49 |
+
--train_batch_size 8 \
|
| 50 |
+
--epochs 3
|
| 51 |
done
|
slice_1946/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<image>": 32000
|
| 3 |
+
}
|
slice_1946/chat_template.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"chat_template": "{% for message in messages %}{% if message['role'] != 'system' %}{{ message['role'].upper() + ': '}}{% endif %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + ' '}}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ content['text'] + ' '}}{% endgeneration %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}"
|
| 3 |
+
}
|
slice_1946/config.json
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_attn_implementation_autoset": true,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"LlavaNextForConditionalGeneration"
|
| 5 |
+
],
|
| 6 |
+
"bos_token_id": 1,
|
| 7 |
+
"eos_token_id": 2,
|
| 8 |
+
"ignore_index": -100,
|
| 9 |
+
"image_grid_pinpoints": [
|
| 10 |
+
[
|
| 11 |
+
336,
|
| 12 |
+
672
|
| 13 |
+
],
|
| 14 |
+
[
|
| 15 |
+
672,
|
| 16 |
+
336
|
| 17 |
+
],
|
| 18 |
+
[
|
| 19 |
+
672,
|
| 20 |
+
672
|
| 21 |
+
],
|
| 22 |
+
[
|
| 23 |
+
1008,
|
| 24 |
+
336
|
| 25 |
+
],
|
| 26 |
+
[
|
| 27 |
+
336,
|
| 28 |
+
1008
|
| 29 |
+
]
|
| 30 |
+
],
|
| 31 |
+
"image_seq_length": 576,
|
| 32 |
+
"image_token_index": 32000,
|
| 33 |
+
"model_type": "llava_next",
|
| 34 |
+
"multimodal_projector_bias": true,
|
| 35 |
+
"pad_token_id": 0,
|
| 36 |
+
"projector_hidden_act": "gelu",
|
| 37 |
+
"text_config": {
|
| 38 |
+
"_name_or_path": "lmsys/vicuna-7b-v1.5",
|
| 39 |
+
"architectures": [
|
| 40 |
+
"LlamaForCausalLM"
|
| 41 |
+
],
|
| 42 |
+
"attention_bias": false,
|
| 43 |
+
"attention_dropout": 0.0,
|
| 44 |
+
"head_dim": 128,
|
| 45 |
+
"hidden_act": "silu",
|
| 46 |
+
"hidden_size": 4096,
|
| 47 |
+
"initializer_range": 0.02,
|
| 48 |
+
"intermediate_size": 11008,
|
| 49 |
+
"max_position_embeddings": 4096,
|
| 50 |
+
"mlp_bias": false,
|
| 51 |
+
"model_type": "llama",
|
| 52 |
+
"num_attention_heads": 32,
|
| 53 |
+
"num_hidden_layers": 32,
|
| 54 |
+
"num_key_value_heads": 32,
|
| 55 |
+
"pad_token_id": 0,
|
| 56 |
+
"pretraining_tp": 1,
|
| 57 |
+
"rms_norm_eps": 1e-05,
|
| 58 |
+
"rope_scaling": null,
|
| 59 |
+
"rope_theta": 10000.0,
|
| 60 |
+
"torch_dtype": "bfloat16",
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 32064
|
| 63 |
+
},
|
| 64 |
+
"tie_word_embeddings": false,
|
| 65 |
+
"torch_dtype": "bfloat16",
|
| 66 |
+
"transformers_version": "4.50.0",
|
| 67 |
+
"use_image_newline_parameter": true,
|
| 68 |
+
"vision_config": {
|
| 69 |
+
"attention_dropout": 0.0,
|
| 70 |
+
"hidden_act": "quick_gelu",
|
| 71 |
+
"hidden_size": 1024,
|
| 72 |
+
"image_size": 336,
|
| 73 |
+
"initializer_factor": 1.0,
|
| 74 |
+
"initializer_range": 0.02,
|
| 75 |
+
"intermediate_size": 4096,
|
| 76 |
+
"layer_norm_eps": 1e-05,
|
| 77 |
+
"model_type": "clip_vision_model",
|
| 78 |
+
"num_attention_heads": 16,
|
| 79 |
+
"num_channels": 3,
|
| 80 |
+
"num_hidden_layers": 24,
|
| 81 |
+
"patch_size": 14,
|
| 82 |
+
"projection_dim": 768,
|
| 83 |
+
"torch_dtype": "bfloat16",
|
| 84 |
+
"vocab_size": 32000
|
| 85 |
+
},
|
| 86 |
+
"vision_feature_layer": -2,
|
| 87 |
+
"vision_feature_select_strategy": "default",
|
| 88 |
+
"vocab_size": 32064
|
| 89 |
+
}
|
slice_1946/preprocessor_config.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"aspect_ratio_setting": "anyres",
|
| 3 |
+
"crop_size": {
|
| 4 |
+
"height": 336,
|
| 5 |
+
"width": 336
|
| 6 |
+
},
|
| 7 |
+
"do_center_crop": true,
|
| 8 |
+
"do_convert_rgb": true,
|
| 9 |
+
"do_normalize": true,
|
| 10 |
+
"do_pad": true,
|
| 11 |
+
"do_rescale": true,
|
| 12 |
+
"do_resize": true,
|
| 13 |
+
"image_grid_pinpoints": [
|
| 14 |
+
[
|
| 15 |
+
336,
|
| 16 |
+
672
|
| 17 |
+
],
|
| 18 |
+
[
|
| 19 |
+
672,
|
| 20 |
+
336
|
| 21 |
+
],
|
| 22 |
+
[
|
| 23 |
+
672,
|
| 24 |
+
672
|
| 25 |
+
],
|
| 26 |
+
[
|
| 27 |
+
1008,
|
| 28 |
+
336
|
| 29 |
+
],
|
| 30 |
+
[
|
| 31 |
+
336,
|
| 32 |
+
1008
|
| 33 |
+
]
|
| 34 |
+
],
|
| 35 |
+
"image_mean": [
|
| 36 |
+
0.48145466,
|
| 37 |
+
0.4578275,
|
| 38 |
+
0.40821073
|
| 39 |
+
],
|
| 40 |
+
"image_processor_type": "LlavaNextImageProcessor",
|
| 41 |
+
"image_std": [
|
| 42 |
+
0.26862954,
|
| 43 |
+
0.26130258,
|
| 44 |
+
0.27577711
|
| 45 |
+
],
|
| 46 |
+
"processor_class": "LlavaNextProcessor",
|
| 47 |
+
"resample": 3,
|
| 48 |
+
"rescale_factor": 0.00392156862745098,
|
| 49 |
+
"size": {
|
| 50 |
+
"shortest_edge": 336
|
| 51 |
+
}
|
| 52 |
+
}
|
slice_1946/processor_config.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"image_token": "<image>",
|
| 3 |
+
"num_additional_image_tokens": 1,
|
| 4 |
+
"patch_size": 14,
|
| 5 |
+
"processor_class": "LlavaNextProcessor",
|
| 6 |
+
"vision_feature_select_strategy": "default"
|
| 7 |
+
}
|
slice_1946/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26125e26b5c79b364ed2575b867704cb01b6881c2952992f7796065d357fbb00
|
| 3 |
+
size 14127100866
|
slice_1946/special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"image_token": "<image>",
|
| 17 |
+
"pad_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"unk_token": {
|
| 25 |
+
"content": "<unk>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
slice_1946/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
slice_1946/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
slice_1946/tokenizer_config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": true,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
},
|
| 30 |
+
"32000": {
|
| 31 |
+
"content": "<image>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false,
|
| 36 |
+
"special": true
|
| 37 |
+
}
|
| 38 |
+
},
|
| 39 |
+
"bos_token": "<s>",
|
| 40 |
+
"clean_up_tokenization_spaces": false,
|
| 41 |
+
"eos_token": "</s>",
|
| 42 |
+
"extra_special_tokens": {
|
| 43 |
+
"image_token": "<image>"
|
| 44 |
+
},
|
| 45 |
+
"image_token": "<image>",
|
| 46 |
+
"legacy": false,
|
| 47 |
+
"model_max_length": 4096,
|
| 48 |
+
"pad_token": "<unk>",
|
| 49 |
+
"padding_side": "left",
|
| 50 |
+
"processor_class": "LlavaNextProcessor",
|
| 51 |
+
"sp_model_kwargs": {},
|
| 52 |
+
"spaces_between_special_tokens": false,
|
| 53 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 54 |
+
"unk_token": "<unk>",
|
| 55 |
+
"use_default_system_prompt": false
|
| 56 |
+
}
|
slice_2919/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<image>": 32000
|
| 3 |
+
}
|
slice_2919/chat_template.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"chat_template": "{% for message in messages %}{% if message['role'] != 'system' %}{{ message['role'].upper() + ': '}}{% endif %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + ' '}}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ content['text'] + ' '}}{% endgeneration %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}"
|
| 3 |
+
}
|
slice_2919/config.json
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_attn_implementation_autoset": true,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"LlavaNextForConditionalGeneration"
|
| 5 |
+
],
|
| 6 |
+
"bos_token_id": 1,
|
| 7 |
+
"eos_token_id": 2,
|
| 8 |
+
"ignore_index": -100,
|
| 9 |
+
"image_grid_pinpoints": [
|
| 10 |
+
[
|
| 11 |
+
336,
|
| 12 |
+
672
|
| 13 |
+
],
|
| 14 |
+
[
|
| 15 |
+
672,
|
| 16 |
+
336
|
| 17 |
+
],
|
| 18 |
+
[
|
| 19 |
+
672,
|
| 20 |
+
672
|
| 21 |
+
],
|
| 22 |
+
[
|
| 23 |
+
1008,
|
| 24 |
+
336
|
| 25 |
+
],
|
| 26 |
+
[
|
| 27 |
+
336,
|
| 28 |
+
1008
|
| 29 |
+
]
|
| 30 |
+
],
|
| 31 |
+
"image_seq_length": 576,
|
| 32 |
+
"image_token_index": 32000,
|
| 33 |
+
"model_type": "llava_next",
|
| 34 |
+
"multimodal_projector_bias": true,
|
| 35 |
+
"pad_token_id": 0,
|
| 36 |
+
"projector_hidden_act": "gelu",
|
| 37 |
+
"text_config": {
|
| 38 |
+
"_name_or_path": "lmsys/vicuna-7b-v1.5",
|
| 39 |
+
"architectures": [
|
| 40 |
+
"LlamaForCausalLM"
|
| 41 |
+
],
|
| 42 |
+
"attention_bias": false,
|
| 43 |
+
"attention_dropout": 0.0,
|
| 44 |
+
"head_dim": 128,
|
| 45 |
+
"hidden_act": "silu",
|
| 46 |
+
"hidden_size": 4096,
|
| 47 |
+
"initializer_range": 0.02,
|
| 48 |
+
"intermediate_size": 11008,
|
| 49 |
+
"max_position_embeddings": 4096,
|
| 50 |
+
"mlp_bias": false,
|
| 51 |
+
"model_type": "llama",
|
| 52 |
+
"num_attention_heads": 32,
|
| 53 |
+
"num_hidden_layers": 32,
|
| 54 |
+
"num_key_value_heads": 32,
|
| 55 |
+
"pad_token_id": 0,
|
| 56 |
+
"pretraining_tp": 1,
|
| 57 |
+
"rms_norm_eps": 1e-05,
|
| 58 |
+
"rope_scaling": null,
|
| 59 |
+
"rope_theta": 10000.0,
|
| 60 |
+
"torch_dtype": "bfloat16",
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 32064
|
| 63 |
+
},
|
| 64 |
+
"tie_word_embeddings": false,
|
| 65 |
+
"torch_dtype": "bfloat16",
|
| 66 |
+
"transformers_version": "4.50.0",
|
| 67 |
+
"use_image_newline_parameter": true,
|
| 68 |
+
"vision_config": {
|
| 69 |
+
"attention_dropout": 0.0,
|
| 70 |
+
"hidden_act": "quick_gelu",
|
| 71 |
+
"hidden_size": 1024,
|
| 72 |
+
"image_size": 336,
|
| 73 |
+
"initializer_factor": 1.0,
|
| 74 |
+
"initializer_range": 0.02,
|
| 75 |
+
"intermediate_size": 4096,
|
| 76 |
+
"layer_norm_eps": 1e-05,
|
| 77 |
+
"model_type": "clip_vision_model",
|
| 78 |
+
"num_attention_heads": 16,
|
| 79 |
+
"num_channels": 3,
|
| 80 |
+
"num_hidden_layers": 24,
|
| 81 |
+
"patch_size": 14,
|
| 82 |
+
"projection_dim": 768,
|
| 83 |
+
"torch_dtype": "bfloat16",
|
| 84 |
+
"vocab_size": 32000
|
| 85 |
+
},
|
| 86 |
+
"vision_feature_layer": -2,
|
| 87 |
+
"vision_feature_select_strategy": "default",
|
| 88 |
+
"vocab_size": 32064
|
| 89 |
+
}
|
slice_2919/preprocessor_config.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"aspect_ratio_setting": "anyres",
|
| 3 |
+
"crop_size": {
|
| 4 |
+
"height": 336,
|
| 5 |
+
"width": 336
|
| 6 |
+
},
|
| 7 |
+
"do_center_crop": true,
|
| 8 |
+
"do_convert_rgb": true,
|
| 9 |
+
"do_normalize": true,
|
| 10 |
+
"do_pad": true,
|
| 11 |
+
"do_rescale": true,
|
| 12 |
+
"do_resize": true,
|
| 13 |
+
"image_grid_pinpoints": [
|
| 14 |
+
[
|
| 15 |
+
336,
|
| 16 |
+
672
|
| 17 |
+
],
|
| 18 |
+
[
|
| 19 |
+
672,
|
| 20 |
+
336
|
| 21 |
+
],
|
| 22 |
+
[
|
| 23 |
+
672,
|
| 24 |
+
672
|
| 25 |
+
],
|
| 26 |
+
[
|
| 27 |
+
1008,
|
| 28 |
+
336
|
| 29 |
+
],
|
| 30 |
+
[
|
| 31 |
+
336,
|
| 32 |
+
1008
|
| 33 |
+
]
|
| 34 |
+
],
|
| 35 |
+
"image_mean": [
|
| 36 |
+
0.48145466,
|
| 37 |
+
0.4578275,
|
| 38 |
+
0.40821073
|
| 39 |
+
],
|
| 40 |
+
"image_processor_type": "LlavaNextImageProcessor",
|
| 41 |
+
"image_std": [
|
| 42 |
+
0.26862954,
|
| 43 |
+
0.26130258,
|
| 44 |
+
0.27577711
|
| 45 |
+
],
|
| 46 |
+
"processor_class": "LlavaNextProcessor",
|
| 47 |
+
"resample": 3,
|
| 48 |
+
"rescale_factor": 0.00392156862745098,
|
| 49 |
+
"size": {
|
| 50 |
+
"shortest_edge": 336
|
| 51 |
+
}
|
| 52 |
+
}
|
slice_2919/processor_config.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"image_token": "<image>",
|
| 3 |
+
"num_additional_image_tokens": 1,
|
| 4 |
+
"patch_size": 14,
|
| 5 |
+
"processor_class": "LlavaNextProcessor",
|
| 6 |
+
"vision_feature_select_strategy": "default"
|
| 7 |
+
}
|
slice_2919/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f2fe0e44c0cb29572163848f7e15956178c685c2ceeb9930e9e5b34a45fd941
|
| 3 |
+
size 14127100866
|
slice_2919/special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"image_token": "<image>",
|
| 17 |
+
"pad_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"unk_token": {
|
| 25 |
+
"content": "<unk>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
slice_2919/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
slice_2919/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
slice_2919/tokenizer_config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": true,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
},
|
| 30 |
+
"32000": {
|
| 31 |
+
"content": "<image>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false,
|
| 36 |
+
"special": true
|
| 37 |
+
}
|
| 38 |
+
},
|
| 39 |
+
"bos_token": "<s>",
|
| 40 |
+
"clean_up_tokenization_spaces": false,
|
| 41 |
+
"eos_token": "</s>",
|
| 42 |
+
"extra_special_tokens": {
|
| 43 |
+
"image_token": "<image>"
|
| 44 |
+
},
|
| 45 |
+
"image_token": "<image>",
|
| 46 |
+
"legacy": false,
|
| 47 |
+
"model_max_length": 4096,
|
| 48 |
+
"pad_token": "<unk>",
|
| 49 |
+
"padding_side": "left",
|
| 50 |
+
"processor_class": "LlavaNextProcessor",
|
| 51 |
+
"sp_model_kwargs": {},
|
| 52 |
+
"spaces_between_special_tokens": false,
|
| 53 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 54 |
+
"unk_token": "<unk>",
|
| 55 |
+
"use_default_system_prompt": false
|
| 56 |
+
}
|
slice_973/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<image>": 32000
|
| 3 |
+
}
|
slice_973/chat_template.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"chat_template": "{% for message in messages %}{% if message['role'] != 'system' %}{{ message['role'].upper() + ': '}}{% endif %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + ' '}}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ content['text'] + ' '}}{% endgeneration %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}"
|
| 3 |
+
}
|
slice_973/config.json
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_attn_implementation_autoset": true,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"LlavaNextForConditionalGeneration"
|
| 5 |
+
],
|
| 6 |
+
"bos_token_id": 1,
|
| 7 |
+
"eos_token_id": 2,
|
| 8 |
+
"ignore_index": -100,
|
| 9 |
+
"image_grid_pinpoints": [
|
| 10 |
+
[
|
| 11 |
+
336,
|
| 12 |
+
672
|
| 13 |
+
],
|
| 14 |
+
[
|
| 15 |
+
672,
|
| 16 |
+
336
|
| 17 |
+
],
|
| 18 |
+
[
|
| 19 |
+
672,
|
| 20 |
+
672
|
| 21 |
+
],
|
| 22 |
+
[
|
| 23 |
+
1008,
|
| 24 |
+
336
|
| 25 |
+
],
|
| 26 |
+
[
|
| 27 |
+
336,
|
| 28 |
+
1008
|
| 29 |
+
]
|
| 30 |
+
],
|
| 31 |
+
"image_seq_length": 576,
|
| 32 |
+
"image_token_index": 32000,
|
| 33 |
+
"model_type": "llava_next",
|
| 34 |
+
"multimodal_projector_bias": true,
|
| 35 |
+
"pad_token_id": 0,
|
| 36 |
+
"projector_hidden_act": "gelu",
|
| 37 |
+
"text_config": {
|
| 38 |
+
"_name_or_path": "lmsys/vicuna-7b-v1.5",
|
| 39 |
+
"architectures": [
|
| 40 |
+
"LlamaForCausalLM"
|
| 41 |
+
],
|
| 42 |
+
"attention_bias": false,
|
| 43 |
+
"attention_dropout": 0.0,
|
| 44 |
+
"head_dim": 128,
|
| 45 |
+
"hidden_act": "silu",
|
| 46 |
+
"hidden_size": 4096,
|
| 47 |
+
"initializer_range": 0.02,
|
| 48 |
+
"intermediate_size": 11008,
|
| 49 |
+
"max_position_embeddings": 4096,
|
| 50 |
+
"mlp_bias": false,
|
| 51 |
+
"model_type": "llama",
|
| 52 |
+
"num_attention_heads": 32,
|
| 53 |
+
"num_hidden_layers": 32,
|
| 54 |
+
"num_key_value_heads": 32,
|
| 55 |
+
"pad_token_id": 0,
|
| 56 |
+
"pretraining_tp": 1,
|
| 57 |
+
"rms_norm_eps": 1e-05,
|
| 58 |
+
"rope_scaling": null,
|
| 59 |
+
"rope_theta": 10000.0,
|
| 60 |
+
"torch_dtype": "bfloat16",
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 32064
|
| 63 |
+
},
|
| 64 |
+
"tie_word_embeddings": false,
|
| 65 |
+
"torch_dtype": "bfloat16",
|
| 66 |
+
"transformers_version": "4.50.0",
|
| 67 |
+
"use_image_newline_parameter": true,
|
| 68 |
+
"vision_config": {
|
| 69 |
+
"attention_dropout": 0.0,
|
| 70 |
+
"hidden_act": "quick_gelu",
|
| 71 |
+
"hidden_size": 1024,
|
| 72 |
+
"image_size": 336,
|
| 73 |
+
"initializer_factor": 1.0,
|
| 74 |
+
"initializer_range": 0.02,
|
| 75 |
+
"intermediate_size": 4096,
|
| 76 |
+
"layer_norm_eps": 1e-05,
|
| 77 |
+
"model_type": "clip_vision_model",
|
| 78 |
+
"num_attention_heads": 16,
|
| 79 |
+
"num_channels": 3,
|
| 80 |
+
"num_hidden_layers": 24,
|
| 81 |
+
"patch_size": 14,
|
| 82 |
+
"projection_dim": 768,
|
| 83 |
+
"torch_dtype": "bfloat16",
|
| 84 |
+
"vocab_size": 32000
|
| 85 |
+
},
|
| 86 |
+
"vision_feature_layer": -2,
|
| 87 |
+
"vision_feature_select_strategy": "default",
|
| 88 |
+
"vocab_size": 32064
|
| 89 |
+
}
|
slice_973/preprocessor_config.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"aspect_ratio_setting": "anyres",
|
| 3 |
+
"crop_size": {
|
| 4 |
+
"height": 336,
|
| 5 |
+
"width": 336
|
| 6 |
+
},
|
| 7 |
+
"do_center_crop": true,
|
| 8 |
+
"do_convert_rgb": true,
|
| 9 |
+
"do_normalize": true,
|
| 10 |
+
"do_pad": true,
|
| 11 |
+
"do_rescale": true,
|
| 12 |
+
"do_resize": true,
|
| 13 |
+
"image_grid_pinpoints": [
|
| 14 |
+
[
|
| 15 |
+
336,
|
| 16 |
+
672
|
| 17 |
+
],
|
| 18 |
+
[
|
| 19 |
+
672,
|
| 20 |
+
336
|
| 21 |
+
],
|
| 22 |
+
[
|
| 23 |
+
672,
|
| 24 |
+
672
|
| 25 |
+
],
|
| 26 |
+
[
|
| 27 |
+
1008,
|
| 28 |
+
336
|
| 29 |
+
],
|
| 30 |
+
[
|
| 31 |
+
336,
|
| 32 |
+
1008
|
| 33 |
+
]
|
| 34 |
+
],
|
| 35 |
+
"image_mean": [
|
| 36 |
+
0.48145466,
|
| 37 |
+
0.4578275,
|
| 38 |
+
0.40821073
|
| 39 |
+
],
|
| 40 |
+
"image_processor_type": "LlavaNextImageProcessor",
|
| 41 |
+
"image_std": [
|
| 42 |
+
0.26862954,
|
| 43 |
+
0.26130258,
|
| 44 |
+
0.27577711
|
| 45 |
+
],
|
| 46 |
+
"processor_class": "LlavaNextProcessor",
|
| 47 |
+
"resample": 3,
|
| 48 |
+
"rescale_factor": 0.00392156862745098,
|
| 49 |
+
"size": {
|
| 50 |
+
"shortest_edge": 336
|
| 51 |
+
}
|
| 52 |
+
}
|
slice_973/processor_config.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"image_token": "<image>",
|
| 3 |
+
"num_additional_image_tokens": 1,
|
| 4 |
+
"patch_size": 14,
|
| 5 |
+
"processor_class": "LlavaNextProcessor",
|
| 6 |
+
"vision_feature_select_strategy": "default"
|
| 7 |
+
}
|
slice_973/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53c94f98c6773a4bd6036fd0a45deace471472bd745b2d9c4981b365fcbb5ef5
|
| 3 |
+
size 14127100866
|
slice_973/special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"image_token": "<image>",
|
| 17 |
+
"pad_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"unk_token": {
|
| 25 |
+
"content": "<unk>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
slice_973/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
slice_973/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
slice_973/tokenizer_config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": true,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
},
|
| 30 |
+
"32000": {
|
| 31 |
+
"content": "<image>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false,
|
| 36 |
+
"special": true
|
| 37 |
+
}
|
| 38 |
+
},
|
| 39 |
+
"bos_token": "<s>",
|
| 40 |
+
"clean_up_tokenization_spaces": false,
|
| 41 |
+
"eos_token": "</s>",
|
| 42 |
+
"extra_special_tokens": {
|
| 43 |
+
"image_token": "<image>"
|
| 44 |
+
},
|
| 45 |
+
"image_token": "<image>",
|
| 46 |
+
"legacy": false,
|
| 47 |
+
"model_max_length": 4096,
|
| 48 |
+
"pad_token": "<unk>",
|
| 49 |
+
"padding_side": "left",
|
| 50 |
+
"processor_class": "LlavaNextProcessor",
|
| 51 |
+
"sp_model_kwargs": {},
|
| 52 |
+
"spaces_between_special_tokens": false,
|
| 53 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 54 |
+
"unk_token": "<unk>",
|
| 55 |
+
"use_default_system_prompt": false
|
| 56 |
+
}
|
slice_end/added_tokens.json
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
{
|
| 2 |
-
"<image>": 32000
|
| 3 |
-
"<pad>": 32001
|
| 4 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"<image>": 32000
|
|
|
|
| 3 |
}
|
slice_end/config.json
CHANGED
|
@@ -32,27 +32,31 @@
|
|
| 32 |
"image_token_index": 32000,
|
| 33 |
"model_type": "llava_next",
|
| 34 |
"multimodal_projector_bias": true,
|
| 35 |
-
"pad_token_id":
|
| 36 |
"projector_hidden_act": "gelu",
|
| 37 |
"text_config": {
|
| 38 |
-
"_name_or_path": "
|
| 39 |
"architectures": [
|
| 40 |
-
"
|
| 41 |
],
|
|
|
|
| 42 |
"attention_dropout": 0.0,
|
| 43 |
"head_dim": 128,
|
| 44 |
"hidden_act": "silu",
|
| 45 |
"hidden_size": 4096,
|
| 46 |
"initializer_range": 0.02,
|
| 47 |
-
"intermediate_size":
|
| 48 |
-
"max_position_embeddings":
|
| 49 |
-
"
|
|
|
|
| 50 |
"num_attention_heads": 32,
|
| 51 |
"num_hidden_layers": 32,
|
| 52 |
-
"num_key_value_heads":
|
|
|
|
|
|
|
| 53 |
"rms_norm_eps": 1e-05,
|
| 54 |
-
"
|
| 55 |
-
"
|
| 56 |
"torch_dtype": "bfloat16",
|
| 57 |
"use_cache": true,
|
| 58 |
"vocab_size": 32064
|
|
|
|
| 32 |
"image_token_index": 32000,
|
| 33 |
"model_type": "llava_next",
|
| 34 |
"multimodal_projector_bias": true,
|
| 35 |
+
"pad_token_id": 0,
|
| 36 |
"projector_hidden_act": "gelu",
|
| 37 |
"text_config": {
|
| 38 |
+
"_name_or_path": "lmsys/vicuna-7b-v1.5",
|
| 39 |
"architectures": [
|
| 40 |
+
"LlamaForCausalLM"
|
| 41 |
],
|
| 42 |
+
"attention_bias": false,
|
| 43 |
"attention_dropout": 0.0,
|
| 44 |
"head_dim": 128,
|
| 45 |
"hidden_act": "silu",
|
| 46 |
"hidden_size": 4096,
|
| 47 |
"initializer_range": 0.02,
|
| 48 |
+
"intermediate_size": 11008,
|
| 49 |
+
"max_position_embeddings": 4096,
|
| 50 |
+
"mlp_bias": false,
|
| 51 |
+
"model_type": "llama",
|
| 52 |
"num_attention_heads": 32,
|
| 53 |
"num_hidden_layers": 32,
|
| 54 |
+
"num_key_value_heads": 32,
|
| 55 |
+
"pad_token_id": 0,
|
| 56 |
+
"pretraining_tp": 1,
|
| 57 |
"rms_norm_eps": 1e-05,
|
| 58 |
+
"rope_scaling": null,
|
| 59 |
+
"rope_theta": 10000.0,
|
| 60 |
"torch_dtype": "bfloat16",
|
| 61 |
"use_cache": true,
|
| 62 |
"vocab_size": 32064
|
slice_end/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f2fe0e44c0cb29572163848f7e15956178c685c2ceeb9930e9e5b34a45fd941
|
| 3 |
+
size 14127100866
|
slice_end/special_tokens_map.json
CHANGED
|
@@ -15,7 +15,7 @@
|
|
| 15 |
},
|
| 16 |
"image_token": "<image>",
|
| 17 |
"pad_token": {
|
| 18 |
-
"content": "<
|
| 19 |
"lstrip": false,
|
| 20 |
"normalized": false,
|
| 21 |
"rstrip": false,
|
|
|
|
| 15 |
},
|
| 16 |
"image_token": "<image>",
|
| 17 |
"pad_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
"lstrip": false,
|
| 20 |
"normalized": false,
|
| 21 |
"rstrip": false,
|
slice_end/tokenizer.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
slice_end/tokenizer.model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
slice_end/tokenizer_config.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"add_bos_token": true,
|
| 3 |
"add_eos_token": false,
|
| 4 |
-
"add_prefix_space":
|
| 5 |
"added_tokens_decoder": {
|
| 6 |
"0": {
|
| 7 |
"content": "<unk>",
|
|
@@ -34,32 +34,19 @@
|
|
| 34 |
"rstrip": false,
|
| 35 |
"single_word": false,
|
| 36 |
"special": true
|
| 37 |
-
},
|
| 38 |
-
"32001": {
|
| 39 |
-
"content": "<pad>",
|
| 40 |
-
"lstrip": false,
|
| 41 |
-
"normalized": false,
|
| 42 |
-
"rstrip": false,
|
| 43 |
-
"single_word": false,
|
| 44 |
-
"special": true
|
| 45 |
}
|
| 46 |
},
|
| 47 |
-
"additional_special_tokens": [],
|
| 48 |
"bos_token": "<s>",
|
| 49 |
-
"chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
|
| 50 |
"clean_up_tokenization_spaces": false,
|
| 51 |
"eos_token": "</s>",
|
| 52 |
"extra_special_tokens": {
|
| 53 |
"image_token": "<image>"
|
| 54 |
},
|
| 55 |
"image_token": "<image>",
|
| 56 |
-
"legacy":
|
| 57 |
-
"
|
| 58 |
-
"
|
| 59 |
-
"
|
| 60 |
-
"pad_token": "<pad>",
|
| 61 |
-
"pad_token_type_id": 0,
|
| 62 |
-
"padding_side": "right",
|
| 63 |
"processor_class": "LlavaNextProcessor",
|
| 64 |
"sp_model_kwargs": {},
|
| 65 |
"spaces_between_special_tokens": false,
|
|
|
|
| 1 |
{
|
| 2 |
"add_bos_token": true,
|
| 3 |
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": true,
|
| 5 |
"added_tokens_decoder": {
|
| 6 |
"0": {
|
| 7 |
"content": "<unk>",
|
|
|
|
| 34 |
"rstrip": false,
|
| 35 |
"single_word": false,
|
| 36 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
}
|
| 38 |
},
|
|
|
|
| 39 |
"bos_token": "<s>",
|
|
|
|
| 40 |
"clean_up_tokenization_spaces": false,
|
| 41 |
"eos_token": "</s>",
|
| 42 |
"extra_special_tokens": {
|
| 43 |
"image_token": "<image>"
|
| 44 |
},
|
| 45 |
"image_token": "<image>",
|
| 46 |
+
"legacy": false,
|
| 47 |
+
"model_max_length": 4096,
|
| 48 |
+
"pad_token": "<unk>",
|
| 49 |
+
"padding_side": "left",
|
|
|
|
|
|
|
|
|
|
| 50 |
"processor_class": "LlavaNextProcessor",
|
| 51 |
"sp_model_kwargs": {},
|
| 52 |
"spaces_between_special_tokens": false,
|
wandb/debug-internal.log
CHANGED
|
@@ -1,18 +1,16 @@
|
|
| 1 |
-
{"time":"2025-
|
| 2 |
-
{"time":"2025-
|
| 3 |
-
{"time":"2025-
|
| 4 |
-
{"time":"2025-
|
| 5 |
-
{"time":"2025-
|
| 6 |
-
{"time":"2025-
|
| 7 |
-
{"time":"2025-
|
| 8 |
-
{"time":"2025-
|
| 9 |
-
{"time":"2025-
|
| 10 |
-
{"time":"2025-
|
| 11 |
-
{"time":"2025-
|
| 12 |
-
{"time":"2025-
|
| 13 |
-
{"time":"2025-
|
| 14 |
-
{"time":"2025-
|
| 15 |
-
{"time":"2025-
|
| 16 |
-
{"time":"2025-
|
| 17 |
-
{"time":"2025-03-29T05:50:21.963502619+08:00","level":"INFO","msg":"sender: closed","stream_id":"woy6qs03"}
|
| 18 |
-
{"time":"2025-03-29T05:50:21.963744004+08:00","level":"INFO","msg":"stream: closed","id":"woy6qs03"}
|
|
|
|
| 1 |
+
{"time":"2025-04-01T09:28:24.081265347+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.8","symlink path":"../outputs/llava_1.6_vicuna_7B_cosi/top1-20/wandb/run-20250401_092824-bd2o51v8/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-04-01T09:28:24.302680251+08:00","level":"INFO","msg":"created new stream","id":"bd2o51v8"}
|
| 3 |
+
{"time":"2025-04-01T09:28:24.302754002+08:00","level":"INFO","msg":"stream: started","id":"bd2o51v8"}
|
| 4 |
+
{"time":"2025-04-01T09:28:24.302817732+08:00","level":"INFO","msg":"handler: started","stream_id":"bd2o51v8"}
|
| 5 |
+
{"time":"2025-04-01T09:28:24.302840797+08:00","level":"INFO","msg":"sender: started","stream_id":"bd2o51v8"}
|
| 6 |
+
{"time":"2025-04-01T09:28:24.302883549+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"bd2o51v8"}
|
| 7 |
+
{"time":"2025-04-01T09:28:24.717791905+08:00","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-04-01T11:33:47.258061087+08:00","level":"INFO","msg":"Stopping system monitor"}
|
| 9 |
+
{"time":"2025-04-01T11:33:47.265278587+08:00","level":"INFO","msg":"Stopped system monitor"}
|
| 10 |
+
{"time":"2025-04-01T11:33:48.674037127+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 11 |
+
{"time":"2025-04-01T11:33:48.72438635+08:00","level":"INFO","msg":"stream: closing","id":"bd2o51v8"}
|
| 12 |
+
{"time":"2025-04-01T11:33:48.724423251+08:00","level":"WARN","msg":"sender: received Exit record more than once, ignoring"}
|
| 13 |
+
{"time":"2025-04-01T11:33:48.894912192+08:00","level":"INFO","msg":"handler: closed","stream_id":"bd2o51v8"}
|
| 14 |
+
{"time":"2025-04-01T11:33:48.89495363+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"bd2o51v8"}
|
| 15 |
+
{"time":"2025-04-01T11:33:48.895096718+08:00","level":"INFO","msg":"sender: closed","stream_id":"bd2o51v8"}
|
| 16 |
+
{"time":"2025-04-01T11:33:48.895192408+08:00","level":"INFO","msg":"stream: closed","id":"bd2o51v8"}
|
|
|
|
|
|
wandb/debug.log
CHANGED
|
@@ -1,32 +1,32 @@
|
|
| 1 |
-
2025-
|
| 2 |
-
2025-
|
| 3 |
-
2025-
|
| 4 |
-
2025-
|
| 5 |
-
2025-
|
| 6 |
-
2025-
|
| 7 |
-
2025-
|
| 8 |
-
2025-
|
| 9 |
-
2025-
|
| 10 |
-
config: {'train_cfgs': {'save_checkpoint': False, 'load_checkpoint': False, 'ds_cfgs': 'ds_z3_config.json', 'epochs': 3, 'seed': 42, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps':
|
| 11 |
-
2025-
|
| 12 |
-
2025-
|
| 13 |
-
2025-
|
| 14 |
-
2025-
|
| 15 |
-
2025-
|
| 16 |
-
2025-
|
| 17 |
-
2025-
|
| 18 |
-
2025-
|
| 19 |
-
2025-
|
| 20 |
-
2025-
|
| 21 |
-
2025-
|
| 22 |
-
2025-
|
| 23 |
-
2025-
|
| 24 |
-
2025-
|
| 25 |
-
2025-
|
| 26 |
-
2025-
|
| 27 |
-
2025-
|
| 28 |
-
2025-
|
| 29 |
-
2025-
|
| 30 |
Traceback (most recent call last):
|
| 31 |
File "/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2352, in _atexit_cleanup
|
| 32 |
self._on_finish()
|
|
@@ -44,4 +44,4 @@ Traceback (most recent call last):
|
|
| 44 |
File "/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/concurrent/futures/thread.py", line 169, in submit
|
| 45 |
raise RuntimeError('cannot schedule new futures after '
|
| 46 |
RuntimeError: cannot schedule new futures after interpreter shutdown
|
| 47 |
-
2025-
|
|
|
|
| 1 |
+
2025-04-01 09:28:24,067 INFO MainThread:516629 [wandb_setup.py:_flush():67] Current SDK version is 0.19.8
|
| 2 |
+
2025-04-01 09:28:24,068 INFO MainThread:516629 [wandb_setup.py:_flush():67] Configure stats pid to 516629
|
| 3 |
+
2025-04-01 09:28:24,068 INFO MainThread:516629 [wandb_setup.py:_flush():67] Loading settings from /home/yangyaodong/.config/wandb/settings
|
| 4 |
+
2025-04-01 09:28:24,068 INFO MainThread:516629 [wandb_setup.py:_flush():67] Loading settings from /aifs4su/yaodong/hantao/align-anything/scripts/wandb/settings
|
| 5 |
+
2025-04-01 09:28:24,068 INFO MainThread:516629 [wandb_setup.py:_flush():67] Loading settings from environment variables
|
| 6 |
+
2025-04-01 09:28:24,068 INFO MainThread:516629 [wandb_init.py:setup_run_log_directory():647] Logging user logs to ../outputs/llava_1.6_vicuna_7B_cosi/top1-20/wandb/run-20250401_092824-bd2o51v8/logs/debug.log
|
| 7 |
+
2025-04-01 09:28:24,069 INFO MainThread:516629 [wandb_init.py:setup_run_log_directory():648] Logging internal logs to ../outputs/llava_1.6_vicuna_7B_cosi/top1-20/wandb/run-20250401_092824-bd2o51v8/logs/debug-internal.log
|
| 8 |
+
2025-04-01 09:28:24,069 INFO MainThread:516629 [wandb_init.py:init():761] calling init triggers
|
| 9 |
+
2025-04-01 09:28:24,069 INFO MainThread:516629 [wandb_init.py:init():766] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'train_cfgs': {'save_checkpoint': False, 'load_checkpoint': False, 'ds_cfgs': 'ds_z3_config.json', 'epochs': 3, 'seed': 42, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 1, 'gradient_checkpointing': True, 'learning_rate': 1e-06, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.0, 'adam_betas': [0.9, 0.95], 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'regularization': 0.001, 'scale_coeff': 0.1, 'freeze_mm_proj': False, 'freeze_vision_tower': True, 'freeze_language_model': False}, 'data_cfgs': {'train_datasets': '/aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-7b_cosi_cut/merged/top1-20', 'train_template': 'AA_TI2T_LLAVA', 'train_size': {}, 'train_split': 'train', 'train_name': 'text-image-to-text', 'train_data_files': {}, 'train_optional_args': [], 'eval_datasets': {}, 'eval_template': {}, 'eval_size': {}, 'eval_split': {}, 'eval_subset': {}, 'eval_data_files': {}, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'dpo', 'output_dir': '../outputs/llava_1.6_vicuna_7B_cosi/top1-20', 'cache_dir': {}, 'save_total_limit': 3}, 'model_cfgs': {'model_name_or_path': '/aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-7b-hf', 'trust_remote_code': True, 'model_max_length': 4096}, 'special_tokens': {}, '_wandb': {}}
|
| 11 |
+
2025-04-01 09:28:24,069 INFO MainThread:516629 [wandb_init.py:init():784] starting backend
|
| 12 |
+
2025-04-01 09:28:24,069 INFO MainThread:516629 [wandb_init.py:init():788] sending inform_init request
|
| 13 |
+
2025-04-01 09:28:24,077 INFO MainThread:516629 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-04-01 09:28:24,077 INFO MainThread:516629 [wandb_init.py:init():798] backend started and connected
|
| 15 |
+
2025-04-01 09:28:24,079 INFO MainThread:516629 [wandb_init.py:init():891] updated telemetry
|
| 16 |
+
2025-04-01 09:28:24,101 INFO MainThread:516629 [wandb_init.py:init():915] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-04-01 09:28:24,710 INFO MainThread:516629 [wandb_init.py:init():990] starting run threads in backend
|
| 18 |
+
2025-04-01 09:28:25,153 INFO MainThread:516629 [wandb_run.py:_console_start():2375] atexit reg
|
| 19 |
+
2025-04-01 09:28:25,153 INFO MainThread:516629 [wandb_run.py:_redirect():2227] redirect: wrap_raw
|
| 20 |
+
2025-04-01 09:28:25,153 INFO MainThread:516629 [wandb_run.py:_redirect():2292] Wrapping output streams.
|
| 21 |
+
2025-04-01 09:28:25,153 INFO MainThread:516629 [wandb_run.py:_redirect():2315] Redirects installed.
|
| 22 |
+
2025-04-01 09:28:25,159 INFO MainThread:516629 [wandb_init.py:init():1032] run started, returning control to user process
|
| 23 |
+
2025-04-01 11:33:47,217 INFO MainThread:516629 [wandb_run.py:_finish():2112] finishing run htlou/align-anything/bd2o51v8
|
| 24 |
+
2025-04-01 11:33:47,219 INFO MainThread:516629 [wandb_run.py:_atexit_cleanup():2340] got exitcode: 0
|
| 25 |
+
2025-04-01 11:33:47,220 INFO MainThread:516629 [wandb_run.py:_restore():2322] restore
|
| 26 |
+
2025-04-01 11:33:47,220 INFO MainThread:516629 [wandb_run.py:_restore():2328] restore done
|
| 27 |
+
2025-04-01 11:33:48,221 INFO MainThread:516629 [wandb_run.py:_restore():2322] restore
|
| 28 |
+
2025-04-01 11:33:48,221 INFO MainThread:516629 [wandb_run.py:_restore():2328] restore done
|
| 29 |
+
2025-04-01 11:33:48,221 ERROR MainThread:516629 [wandb_run.py:_atexit_cleanup():2361] Problem finishing run
|
| 30 |
Traceback (most recent call last):
|
| 31 |
File "/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2352, in _atexit_cleanup
|
| 32 |
self._on_finish()
|
|
|
|
| 44 |
File "/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/concurrent/futures/thread.py", line 169, in submit
|
| 45 |
raise RuntimeError('cannot schedule new futures after '
|
| 46 |
RuntimeError: cannot schedule new futures after interpreter shutdown
|
| 47 |
+
2025-04-01 11:33:48,723 INFO MsgRouterThr:516629 [mailbox.py:close():129] Closing mailbox, abandoning 2 handles.
|
wandb/run-20250401_092824-bd2o51v8/files/config.yaml
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.19.8
|
| 4 |
+
m: []
|
| 5 |
+
python_version: 3.11.11
|
| 6 |
+
t:
|
| 7 |
+
"1":
|
| 8 |
+
- 1
|
| 9 |
+
- 5
|
| 10 |
+
- 11
|
| 11 |
+
- 41
|
| 12 |
+
- 49
|
| 13 |
+
- 51
|
| 14 |
+
- 53
|
| 15 |
+
- 55
|
| 16 |
+
- 63
|
| 17 |
+
- 71
|
| 18 |
+
- 83
|
| 19 |
+
- 98
|
| 20 |
+
- 105
|
| 21 |
+
"2":
|
| 22 |
+
- 1
|
| 23 |
+
- 5
|
| 24 |
+
- 11
|
| 25 |
+
- 41
|
| 26 |
+
- 49
|
| 27 |
+
- 51
|
| 28 |
+
- 53
|
| 29 |
+
- 55
|
| 30 |
+
- 63
|
| 31 |
+
- 71
|
| 32 |
+
- 83
|
| 33 |
+
- 98
|
| 34 |
+
- 105
|
| 35 |
+
"3":
|
| 36 |
+
- 2
|
| 37 |
+
- 13
|
| 38 |
+
- 16
|
| 39 |
+
- 23
|
| 40 |
+
- 55
|
| 41 |
+
- 61
|
| 42 |
+
"4": 3.11.11
|
| 43 |
+
"5": 0.19.8
|
| 44 |
+
"6": 4.50.0
|
| 45 |
+
"8":
|
| 46 |
+
- 5
|
| 47 |
+
"12": 0.19.8
|
| 48 |
+
"13": linux-x86_64
|
| 49 |
+
data_cfgs:
|
| 50 |
+
value:
|
| 51 |
+
eval_optional_args: []
|
| 52 |
+
train_datasets: /aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-7b_cosi_cut/merged/top1-20
|
| 53 |
+
train_name: text-image-to-text
|
| 54 |
+
train_optional_args: []
|
| 55 |
+
train_split: train
|
| 56 |
+
train_template: AA_TI2T_LLAVA
|
| 57 |
+
logger_cfgs:
|
| 58 |
+
value:
|
| 59 |
+
log_project: align-anything
|
| 60 |
+
log_run_name: dpo
|
| 61 |
+
log_type: wandb
|
| 62 |
+
output_dir: ../outputs/llava_1.6_vicuna_7B_cosi/top1-20
|
| 63 |
+
save_total_limit: 3
|
| 64 |
+
model_cfgs:
|
| 65 |
+
value:
|
| 66 |
+
model_max_length: 4096
|
| 67 |
+
model_name_or_path: /aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-7b-hf
|
| 68 |
+
trust_remote_code: true
|
| 69 |
+
train_cfgs:
|
| 70 |
+
value:
|
| 71 |
+
adam_betas:
|
| 72 |
+
- 0.9
|
| 73 |
+
- 0.95
|
| 74 |
+
bf16: true
|
| 75 |
+
ds_cfgs: ds_z3_config.json
|
| 76 |
+
epochs: 3
|
| 77 |
+
eval_interval: 10
|
| 78 |
+
eval_strategy: epoch
|
| 79 |
+
fp16: false
|
| 80 |
+
freeze_language_model: false
|
| 81 |
+
freeze_mm_proj: false
|
| 82 |
+
freeze_vision_tower: true
|
| 83 |
+
gradient_accumulation_steps: 1
|
| 84 |
+
gradient_checkpointing: true
|
| 85 |
+
learning_rate: 1e-06
|
| 86 |
+
load_checkpoint: false
|
| 87 |
+
lr_scheduler_type: cosine
|
| 88 |
+
lr_warmup_ratio: 0.03
|
| 89 |
+
per_device_eval_batch_size: 1
|
| 90 |
+
per_device_train_batch_size: 1
|
| 91 |
+
regularization: 0.001
|
| 92 |
+
save_checkpoint: false
|
| 93 |
+
scale_coeff: 0.1
|
| 94 |
+
seed: 42
|
| 95 |
+
weight_decay: 0
|
wandb/run-20250401_092824-bd2o51v8/files/output.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/run-20250401_092824-bd2o51v8/files/requirements.txt
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
maskrcnn_benchmark==0.0.0
|
| 2 |
+
webdataset==0.2.111
|
| 3 |
+
websockets==15.0.1
|
| 4 |
+
typer==0.15.2
|
| 5 |
+
blobfile==3.0.0
|
| 6 |
+
pooch==1.8.2
|
| 7 |
+
filelock==3.18.0
|
| 8 |
+
referencing==0.36.2
|
| 9 |
+
matplotlib==3.10.1
|
| 10 |
+
cachetools==5.5.2
|
| 11 |
+
python-dateutil==2.9.0.post0
|
| 12 |
+
gmpy2==2.2.1
|
| 13 |
+
uvloop==0.21.0
|
| 14 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 15 |
+
clip==0.2.0
|
| 16 |
+
httpcore==1.0.7
|
| 17 |
+
charset-normalizer==3.3.2
|
| 18 |
+
torchlibrosa==0.1.0
|
| 19 |
+
contourpy==1.3.1
|
| 20 |
+
multiprocess==0.70.16
|
| 21 |
+
nest-asyncio==1.6.0
|
| 22 |
+
Werkzeug==3.1.3
|
| 23 |
+
aiofiles==23.2.1
|
| 24 |
+
six==1.17.0
|
| 25 |
+
torch==2.6.0
|
| 26 |
+
sse-starlette==2.2.1
|
| 27 |
+
typing_extensions==4.12.2
|
| 28 |
+
xgrammar==0.1.16
|
| 29 |
+
psutil==7.0.0
|
| 30 |
+
kiwisolver==1.4.8
|
| 31 |
+
moviepy==2.1.2
|
| 32 |
+
frozenlist==1.5.0
|
| 33 |
+
jiter==0.9.0
|
| 34 |
+
einops==0.8.1
|
| 35 |
+
flash_attn==2.7.4.post1
|
| 36 |
+
PySocks==1.7.1
|
| 37 |
+
regex==2024.11.6
|
| 38 |
+
markdown-it-py==3.0.0
|
| 39 |
+
ruff==0.11.2
|
| 40 |
+
docker-pycreds==0.4.0
|
| 41 |
+
nvidia-nvtx-cu12==12.4.127
|
| 42 |
+
pyparsing==3.2.3
|
| 43 |
+
resampy==0.4.3
|
| 44 |
+
tokenizers==0.21.0
|
| 45 |
+
frechet-audio-distance==0.1.2
|
| 46 |
+
aiohappyeyeballs==2.6.1
|
| 47 |
+
llamafactory==0.9.3.dev0
|
| 48 |
+
msgspec==0.19.0
|
| 49 |
+
httpx==0.28.1
|
| 50 |
+
encodec==0.1.1
|
| 51 |
+
ffmpy==0.5.0
|
| 52 |
+
jsonschema==4.23.0
|
| 53 |
+
imageio-ffmpeg==0.6.0
|
| 54 |
+
mkl_random==1.2.8
|
| 55 |
+
fairscale==0.4.13
|
| 56 |
+
soxr==0.5.0.post1
|
| 57 |
+
lark==1.2.2
|
| 58 |
+
gradio==5.21.0
|
| 59 |
+
absl-py==2.2.1
|
| 60 |
+
dnspython==2.7.0
|
| 61 |
+
networkx==3.4.2
|
| 62 |
+
h5py==3.13.0
|
| 63 |
+
hjson==3.1.0
|
| 64 |
+
tensorboard==2.19.0
|
| 65 |
+
aiosignal==1.3.2
|
| 66 |
+
pip==25.0
|
| 67 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 68 |
+
llguidance==0.7.11
|
| 69 |
+
zipp==3.21.0
|
| 70 |
+
ftfy==6.3.1
|
| 71 |
+
peft==0.15.0
|
| 72 |
+
attrs==25.3.0
|
| 73 |
+
trl==0.9.6
|
| 74 |
+
requests==2.32.3
|
| 75 |
+
progressbar==2.5
|
| 76 |
+
sniffio==1.3.1
|
| 77 |
+
pycountry==24.6.1
|
| 78 |
+
lxml==5.3.1
|
| 79 |
+
starlette==0.46.1
|
| 80 |
+
pytest==7.2.0
|
| 81 |
+
Markdown==3.7
|
| 82 |
+
mdurl==0.1.2
|
| 83 |
+
pyzmq==26.3.0
|
| 84 |
+
safetensors==0.5.3
|
| 85 |
+
opencv-python==4.6.0.66
|
| 86 |
+
prometheus-fastapi-instrumentator==7.1.0
|
| 87 |
+
shellingham==1.5.4
|
| 88 |
+
torchvision==0.21.0
|
| 89 |
+
pluggy==1.5.0
|
| 90 |
+
timm==1.0.15
|
| 91 |
+
multidict==6.2.0
|
| 92 |
+
semantic-version==2.10.0
|
| 93 |
+
airportsdata==20250224
|
| 94 |
+
numba==0.60.0
|
| 95 |
+
MarkupSafe==2.1.5
|
| 96 |
+
pydantic_core==2.33.0
|
| 97 |
+
imageio==2.37.0
|
| 98 |
+
nvidia-nccl-cu12==2.21.5
|
| 99 |
+
dill==0.3.8
|
| 100 |
+
msgpack==1.1.0
|
| 101 |
+
sentry-sdk==2.24.1
|
| 102 |
+
rpds-py==0.24.0
|
| 103 |
+
grpcio==1.71.0
|
| 104 |
+
fastrlock==0.8.3
|
| 105 |
+
python-json-logger==3.3.0
|
| 106 |
+
cffi==1.17.1
|
| 107 |
+
gradio_client==1.7.2
|
| 108 |
+
PyYAML==6.0.2
|
| 109 |
+
tensorboard-data-server==0.7.2
|
| 110 |
+
termcolor==2.5.0
|
| 111 |
+
torchaudio==2.6.0
|
| 112 |
+
triton==3.2.0
|
| 113 |
+
fastapi==0.115.12
|
| 114 |
+
clint==0.5.1
|
| 115 |
+
lazy_loader==0.4
|
| 116 |
+
depyf==0.18.0
|
| 117 |
+
mkl_fft==1.3.11
|
| 118 |
+
annotated-types==0.7.0
|
| 119 |
+
scikit-learn==1.6.1
|
| 120 |
+
wget==3.2
|
| 121 |
+
setuptools==75.8.0
|
| 122 |
+
args==0.1.0
|
| 123 |
+
certifi==2025.1.31
|
| 124 |
+
click==8.1.8
|
| 125 |
+
python-dotenv==1.1.0
|
| 126 |
+
laion_clap==1.1.5
|
| 127 |
+
Pygments==2.19.1
|
| 128 |
+
tomlkit==0.13.2
|
| 129 |
+
idna==3.7
|
| 130 |
+
propcache==0.3.1
|
| 131 |
+
platformdirs==4.3.7
|
| 132 |
+
align-anything==0.0.1.dev0
|
| 133 |
+
ray==2.44.1
|
| 134 |
+
cloudpickle==3.1.1
|
| 135 |
+
deepspeed==0.16.5
|
| 136 |
+
smmap==5.0.2
|
| 137 |
+
distro==1.9.0
|
| 138 |
+
fonttools==4.56.0
|
| 139 |
+
typing-inspection==0.4.0
|
| 140 |
+
braceexpand==0.1.7
|
| 141 |
+
decorator==5.2.1
|
| 142 |
+
diskcache==5.6.3
|
| 143 |
+
yt-dlp==2025.3.27
|
| 144 |
+
shtab==1.7.1
|
| 145 |
+
gguf==0.10.0
|
| 146 |
+
interegular==0.3.3
|
| 147 |
+
compressed-tensors==0.9.2
|
| 148 |
+
pandas==2.2.3
|
| 149 |
+
huggingface-hub==0.29.3
|
| 150 |
+
pyarrow==19.0.1
|
| 151 |
+
lm-format-enforcer==0.10.11
|
| 152 |
+
GitPython==3.1.44
|
| 153 |
+
xxhash==3.5.0
|
| 154 |
+
packaging==24.2
|
| 155 |
+
setproctitle==1.3.5
|
| 156 |
+
llvmlite==0.43.0
|
| 157 |
+
tiktoken==0.9.0
|
| 158 |
+
mpmath==1.3.0
|
| 159 |
+
email_validator==2.2.0
|
| 160 |
+
nvidia-ml-py==12.570.86
|
| 161 |
+
pydantic==2.11.0
|
| 162 |
+
xformers==0.0.29.post2
|
| 163 |
+
httptools==0.6.4
|
| 164 |
+
librosa==0.11.0
|
| 165 |
+
pytorch-fid==0.3.0
|
| 166 |
+
hpsv2==1.2.0
|
| 167 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 168 |
+
iniconfig==2.1.0
|
| 169 |
+
sympy==1.13.1
|
| 170 |
+
safehttpx==0.1.6
|
| 171 |
+
jsonschema-specifications==2024.10.1
|
| 172 |
+
Jinja2==3.1.6
|
| 173 |
+
tyro==0.8.14
|
| 174 |
+
h11==0.14.0
|
| 175 |
+
aiohttp==3.11.14
|
| 176 |
+
diffusers==0.32.2
|
| 177 |
+
tqdm==4.67.1
|
| 178 |
+
blake3==1.0.4
|
| 179 |
+
vllm==0.8.2
|
| 180 |
+
scipy==1.10.1
|
| 181 |
+
audioread==3.0.1
|
| 182 |
+
proglog==0.1.10
|
| 183 |
+
fire==0.7.0
|
| 184 |
+
sentencepiece==0.2.0
|
| 185 |
+
pytz==2025.2
|
| 186 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 187 |
+
nvidia-curand-cu12==10.3.5.147
|
| 188 |
+
numpy==1.26.4
|
| 189 |
+
tzdata==2025.2
|
| 190 |
+
python-multipart==0.0.20
|
| 191 |
+
urllib3==2.3.0
|
| 192 |
+
pycryptodomex==3.22.0
|
| 193 |
+
yarl==1.18.3
|
| 194 |
+
outlines==0.1.11
|
| 195 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 196 |
+
pydub==0.25.1
|
| 197 |
+
mistral_common==1.5.4
|
| 198 |
+
pycparser==2.22
|
| 199 |
+
pytest-split==0.8.0
|
| 200 |
+
datasets==3.4.1
|
| 201 |
+
soundfile==0.13.1
|
| 202 |
+
transformers==4.50.0
|
| 203 |
+
image-reward==1.5
|
| 204 |
+
wcwidth==0.2.13
|
| 205 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 206 |
+
groovy==0.1.2
|
| 207 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 208 |
+
astor==0.8.1
|
| 209 |
+
anyio==4.9.0
|
| 210 |
+
wandb==0.19.8
|
| 211 |
+
joblib==1.4.2
|
| 212 |
+
fsspec==2024.12.0
|
| 213 |
+
accelerate==1.5.2
|
| 214 |
+
py-cpuinfo==9.0.0
|
| 215 |
+
docstring_parser==0.16
|
| 216 |
+
partial-json-parser==0.2.1.1.post5
|
| 217 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 218 |
+
protobuf==3.20.3
|
| 219 |
+
outlines_core==0.1.26
|
| 220 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 221 |
+
cycler==0.12.1
|
| 222 |
+
uvicorn==0.34.0
|
| 223 |
+
orjson==3.10.16
|
| 224 |
+
av==14.2.0
|
| 225 |
+
Brotli==1.0.9
|
| 226 |
+
cupy-cuda12x==13.4.1
|
| 227 |
+
openai==1.69.0
|
| 228 |
+
rich==13.9.4
|
| 229 |
+
importlib_metadata==8.6.1
|
| 230 |
+
ninja==1.11.1.4
|
| 231 |
+
wheel==0.45.1
|
| 232 |
+
pillow==10.4.0
|
| 233 |
+
prometheus_client==0.21.1
|
| 234 |
+
threadpoolctl==3.6.0
|
| 235 |
+
gitdb==4.0.12
|
| 236 |
+
watchfiles==1.0.4
|
| 237 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 238 |
+
opencv-python-headless==4.11.0.86
|
| 239 |
+
mkl-service==2.4.0
|
| 240 |
+
rich-toolkit==0.14.0
|
| 241 |
+
fastapi-cli==0.0.7
|
| 242 |
+
llamafactory==0.9.3.dev0
|
| 243 |
+
typing_extensions==4.12.2
|
| 244 |
+
tomli==2.0.1
|
| 245 |
+
zipp==3.19.2
|
| 246 |
+
wheel==0.43.0
|
| 247 |
+
jaraco.text==3.12.1
|
| 248 |
+
packaging==24.2
|
| 249 |
+
autocommand==2.2.2
|
| 250 |
+
jaraco.functools==4.0.1
|
| 251 |
+
jaraco.collections==5.1.0
|
| 252 |
+
platformdirs==4.2.2
|
| 253 |
+
more-itertools==10.3.0
|
| 254 |
+
inflect==7.3.1
|
| 255 |
+
jaraco.context==5.3.0
|
| 256 |
+
typeguard==4.3.0
|
| 257 |
+
backports.tarfile==1.2.0
|
| 258 |
+
importlib_metadata==8.0.0
|
wandb/run-20250401_092824-bd2o51v8/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-5.15.0-1040-nvidia-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.11.11",
|
| 4 |
+
"startedAt": "2025-04-01T01:28:24.078287Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--local_rank=0",
|
| 7 |
+
"--model_name_or_path",
|
| 8 |
+
"/aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-7b-hf",
|
| 9 |
+
"--train_datasets",
|
| 10 |
+
"/aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-7b_cosi_cut/merged/top1-20",
|
| 11 |
+
"--train_template",
|
| 12 |
+
"AA_TI2T_LLAVA",
|
| 13 |
+
"--train_split",
|
| 14 |
+
"train",
|
| 15 |
+
"--train_name",
|
| 16 |
+
"text-image-to-text",
|
| 17 |
+
"--output_dir",
|
| 18 |
+
"../outputs/llava_1.6_vicuna_7B_cosi/top1-20",
|
| 19 |
+
"--save_total_limit",
|
| 20 |
+
"3",
|
| 21 |
+
"--train_batch_size",
|
| 22 |
+
"8",
|
| 23 |
+
"--epochs",
|
| 24 |
+
"3"
|
| 25 |
+
],
|
| 26 |
+
"program": "-m align_anything.trainers.text_image_to_text.dpo",
|
| 27 |
+
"git": {
|
| 28 |
+
"remote": "git@github.com-hantao:PKU-Alignment/align-anything.git",
|
| 29 |
+
"commit": "106588f9802757a3283c1aff1f33ea9afd737f31"
|
| 30 |
+
},
|
| 31 |
+
"email": "2200017789@stu.pku.edu.cn",
|
| 32 |
+
"root": "../outputs/llava_1.6_vicuna_7B_cosi/top1-20",
|
| 33 |
+
"host": "dgx-075",
|
| 34 |
+
"executable": "/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/python",
|
| 35 |
+
"cpu_count": 112,
|
| 36 |
+
"cpu_count_logical": 224,
|
| 37 |
+
"gpu": "NVIDIA H800",
|
| 38 |
+
"gpu_count": 8,
|
| 39 |
+
"disk": {
|
| 40 |
+
"/": {
|
| 41 |
+
"total": "1888556142592",
|
| 42 |
+
"used": "44068241408"
|
| 43 |
+
}
|
| 44 |
+
},
|
| 45 |
+
"memory": {
|
| 46 |
+
"total": "2164195545088"
|
| 47 |
+
},
|
| 48 |
+
"cpu": {
|
| 49 |
+
"count": 112,
|
| 50 |
+
"countLogical": 224
|
| 51 |
+
},
|
| 52 |
+
"gpu_nvidia": [
|
| 53 |
+
{
|
| 54 |
+
"name": "NVIDIA H800",
|
| 55 |
+
"memoryTotal": "85520809984",
|
| 56 |
+
"cudaCores": 16896,
|
| 57 |
+
"architecture": "Hopper"
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"name": "NVIDIA H800",
|
| 61 |
+
"memoryTotal": "85520809984",
|
| 62 |
+
"cudaCores": 16896,
|
| 63 |
+
"architecture": "Hopper"
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"name": "NVIDIA H800",
|
| 67 |
+
"memoryTotal": "85520809984",
|
| 68 |
+
"cudaCores": 16896,
|
| 69 |
+
"architecture": "Hopper"
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"name": "NVIDIA H800",
|
| 73 |
+
"memoryTotal": "85520809984",
|
| 74 |
+
"cudaCores": 16896,
|
| 75 |
+
"architecture": "Hopper"
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"name": "NVIDIA H800",
|
| 79 |
+
"memoryTotal": "85520809984",
|
| 80 |
+
"cudaCores": 16896,
|
| 81 |
+
"architecture": "Hopper"
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"name": "NVIDIA H800",
|
| 85 |
+
"memoryTotal": "85520809984",
|
| 86 |
+
"cudaCores": 16896,
|
| 87 |
+
"architecture": "Hopper"
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"name": "NVIDIA H800",
|
| 91 |
+
"memoryTotal": "85520809984",
|
| 92 |
+
"cudaCores": 16896,
|
| 93 |
+
"architecture": "Hopper"
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"name": "NVIDIA H800",
|
| 97 |
+
"memoryTotal": "85520809984",
|
| 98 |
+
"cudaCores": 16896,
|
| 99 |
+
"architecture": "Hopper"
|
| 100 |
+
}
|
| 101 |
+
],
|
| 102 |
+
"slurm": {
|
| 103 |
+
"conf": "/cm/shared/apps/slurm/var/etc/slurm/slurm.conf"
|
| 104 |
+
},
|
| 105 |
+
"cudaVersion": "12.2"
|
| 106 |
+
}
|
wandb/run-20250401_092824-bd2o51v8/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"train/reward":-14.6875,"train/better_sample_reward":-2.265625,"train/lr":0,"_timestamp":1.7434783858305552e+09,"train/step":2919,"train/worse_sample_reward":-12.4375,"train/reward_margin":10.1875,"_runtime":7523.179692948,"_wandb":{"runtime":7523},"train/loss":0.00011682510375976562,"_step":2919,"train/epoch":3,"train/reward_accuracy":1}
|
wandb/run-20250401_092824-bd2o51v8/logs/debug-core.log
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-04-01T09:28:23.49254425+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpk169qwgv/port-516629.txt","pid":516629,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
| 2 |
+
{"time":"2025-04-01T09:28:23.493893758+08:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":516629}
|
| 3 |
+
{"time":"2025-04-01T09:28:23.493865904+08:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":38869,"Zone":""}}
|
| 4 |
+
{"time":"2025-04-01T09:28:23.644131169+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51766"}
|
| 5 |
+
{"time":"2025-04-01T09:28:24.079737159+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"bd2o51v8","id":"127.0.0.1:51766"}
|
| 6 |
+
{"time":"2025-04-01T09:28:24.302760102+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"bd2o51v8","id":"127.0.0.1:51766"}
|
| 7 |
+
{"time":"2025-04-01T11:33:48.724033921+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51766"}
|
| 8 |
+
{"time":"2025-04-01T11:33:48.724433364+08:00","level":"INFO","msg":"connection: closing","id":"127.0.0.1:51766"}
|
| 9 |
+
{"time":"2025-04-01T11:33:48.724466682+08:00","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:51766"}
|
| 10 |
+
{"time":"2025-04-01T11:33:48.724456672+08:00","level":"INFO","msg":"server is shutting down"}
|
| 11 |
+
{"time":"2025-04-01T11:33:48.895059085+08:00","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:38869->127.0.0.1:51766: use of closed network connection","id":"127.0.0.1:51766"}
|
| 12 |
+
{"time":"2025-04-01T11:33:48.895255575+08:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51766"}
|
| 13 |
+
{"time":"2025-04-01T11:33:48.895267229+08:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51766"}
|
| 14 |
+
{"time":"2025-04-01T11:33:48.895272478+08:00","level":"INFO","msg":"server is closed"}
|
wandb/run-20250401_092824-bd2o51v8/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-04-01T09:28:24.081265347+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.8","symlink path":"../outputs/llava_1.6_vicuna_7B_cosi/top1-20/wandb/run-20250401_092824-bd2o51v8/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-04-01T09:28:24.302680251+08:00","level":"INFO","msg":"created new stream","id":"bd2o51v8"}
|
| 3 |
+
{"time":"2025-04-01T09:28:24.302754002+08:00","level":"INFO","msg":"stream: started","id":"bd2o51v8"}
|
| 4 |
+
{"time":"2025-04-01T09:28:24.302817732+08:00","level":"INFO","msg":"handler: started","stream_id":"bd2o51v8"}
|
| 5 |
+
{"time":"2025-04-01T09:28:24.302840797+08:00","level":"INFO","msg":"sender: started","stream_id":"bd2o51v8"}
|
| 6 |
+
{"time":"2025-04-01T09:28:24.302883549+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"bd2o51v8"}
|
| 7 |
+
{"time":"2025-04-01T09:28:24.717791905+08:00","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-04-01T11:33:47.258061087+08:00","level":"INFO","msg":"Stopping system monitor"}
|
| 9 |
+
{"time":"2025-04-01T11:33:47.265278587+08:00","level":"INFO","msg":"Stopped system monitor"}
|
| 10 |
+
{"time":"2025-04-01T11:33:48.674037127+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 11 |
+
{"time":"2025-04-01T11:33:48.72438635+08:00","level":"INFO","msg":"stream: closing","id":"bd2o51v8"}
|
| 12 |
+
{"time":"2025-04-01T11:33:48.724423251+08:00","level":"WARN","msg":"sender: received Exit record more than once, ignoring"}
|
| 13 |
+
{"time":"2025-04-01T11:33:48.894912192+08:00","level":"INFO","msg":"handler: closed","stream_id":"bd2o51v8"}
|
| 14 |
+
{"time":"2025-04-01T11:33:48.89495363+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"bd2o51v8"}
|
| 15 |
+
{"time":"2025-04-01T11:33:48.895096718+08:00","level":"INFO","msg":"sender: closed","stream_id":"bd2o51v8"}
|
| 16 |
+
{"time":"2025-04-01T11:33:48.895192408+08:00","level":"INFO","msg":"stream: closed","id":"bd2o51v8"}
|