htlou commited on
Commit
ad4483e
·
verified ·
1 Parent(s): 615587d

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. arguments.yaml +11 -13
  3. environ.txt +32 -63
  4. script.sh +28 -26
  5. slice_1946/added_tokens.json +3 -0
  6. slice_1946/chat_template.json +3 -0
  7. slice_1946/config.json +89 -0
  8. slice_1946/preprocessor_config.json +52 -0
  9. slice_1946/processor_config.json +7 -0
  10. slice_1946/pytorch_model.bin +3 -0
  11. slice_1946/special_tokens_map.json +31 -0
  12. slice_1946/tokenizer.json +0 -0
  13. slice_1946/tokenizer.model +3 -0
  14. slice_1946/tokenizer_config.json +56 -0
  15. slice_2919/added_tokens.json +3 -0
  16. slice_2919/chat_template.json +3 -0
  17. slice_2919/config.json +89 -0
  18. slice_2919/preprocessor_config.json +52 -0
  19. slice_2919/processor_config.json +7 -0
  20. slice_2919/pytorch_model.bin +3 -0
  21. slice_2919/special_tokens_map.json +31 -0
  22. slice_2919/tokenizer.json +0 -0
  23. slice_2919/tokenizer.model +3 -0
  24. slice_2919/tokenizer_config.json +56 -0
  25. slice_973/added_tokens.json +3 -0
  26. slice_973/chat_template.json +3 -0
  27. slice_973/config.json +89 -0
  28. slice_973/preprocessor_config.json +52 -0
  29. slice_973/processor_config.json +7 -0
  30. slice_973/pytorch_model.bin +3 -0
  31. slice_973/special_tokens_map.json +31 -0
  32. slice_973/tokenizer.json +0 -0
  33. slice_973/tokenizer.model +3 -0
  34. slice_973/tokenizer_config.json +56 -0
  35. slice_end/added_tokens.json +1 -2
  36. slice_end/config.json +13 -9
  37. slice_end/pytorch_model.bin +2 -2
  38. slice_end/special_tokens_map.json +1 -1
  39. slice_end/tokenizer.json +0 -0
  40. slice_end/tokenizer.model +2 -2
  41. slice_end/tokenizer_config.json +5 -18
  42. wandb/debug-internal.log +16 -18
  43. wandb/debug.log +30 -30
  44. wandb/run-20250401_092824-bd2o51v8/files/config.yaml +95 -0
  45. wandb/run-20250401_092824-bd2o51v8/files/output.log +0 -0
  46. wandb/run-20250401_092824-bd2o51v8/files/requirements.txt +258 -0
  47. wandb/run-20250401_092824-bd2o51v8/files/wandb-metadata.json +106 -0
  48. wandb/run-20250401_092824-bd2o51v8/files/wandb-summary.json +1 -0
  49. wandb/run-20250401_092824-bd2o51v8/logs/debug-core.log +14 -0
  50. wandb/run-20250401_092824-bd2o51v8/logs/debug-internal.log +16 -0
.gitattributes CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  wandb/run-20250331_174515-0lcg01si/run-0lcg01si.wandb filter=lfs diff=lfs merge=lfs -text
37
  wandb/run-20250329_013436-woy6qs03/run-woy6qs03.wandb filter=lfs diff=lfs merge=lfs -text
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  wandb/run-20250331_174515-0lcg01si/run-0lcg01si.wandb filter=lfs diff=lfs merge=lfs -text
37
  wandb/run-20250329_013436-woy6qs03/run-woy6qs03.wandb filter=lfs diff=lfs merge=lfs -text
38
+ wandb/run-20250401_092824-bd2o51v8/run-bd2o51v8.wandb filter=lfs diff=lfs merge=lfs -text
arguments.yaml CHANGED
@@ -1,37 +1,34 @@
1
  data_cfgs:
2
  eval_data_files: {}
3
  eval_datasets: {}
4
- eval_name: {}
5
  eval_optional_args: []
6
  eval_size: {}
7
  eval_split: {}
8
  eval_subset: {}
9
  eval_template: {}
10
- load_multi_datasets: false
11
  train_data_files: {}
12
- train_datasets: /aifs4su/yaodong/hantao/datasets/MMInstruct-GPT4V_mistral-7b_cosi_cut/merged/top1-20
13
  train_name: text-image-to-text
14
  train_optional_args: []
15
  train_size: {}
16
  train_split: train
17
- train_template: MM_TI2T_LLAVA
18
  logger_cfgs:
19
  cache_dir: {}
20
  log_project: align-anything
21
- log_run_name: sft
22
  log_type: wandb
23
- output_dir: ../outputs/LLAVA_7B_cosi/top1-20
24
- save_total_limit: 6
25
  model_cfgs:
26
- model_max_length: 2048
27
- model_name_or_path: /aifs4su/yaodong/hantao/models/llava-v1.6-mistral-7b-hf
28
  trust_remote_code: true
29
  special_tokens: {}
30
  train_cfgs:
31
  adam_betas:
32
  - 0.9
33
  - 0.95
34
- adam_epsilon: 1.0e-08
35
  bf16: true
36
  ds_cfgs: ds_z3_config.json
37
  epochs: 3
@@ -41,15 +38,16 @@ train_cfgs:
41
  freeze_language_model: false
42
  freeze_mm_proj: false
43
  freeze_vision_tower: true
44
- gradient_accumulation_steps: 16
45
  gradient_checkpointing: true
46
- learning_rate: 2.0e-05
47
  load_checkpoint: false
48
  lr_scheduler_type: cosine
49
  lr_warmup_ratio: 0.03
50
- max_grad_norm: 1.0
51
  per_device_eval_batch_size: 1
52
  per_device_train_batch_size: 1
 
53
  save_checkpoint: false
 
54
  seed: 42
55
  weight_decay: 0.0
 
1
  data_cfgs:
2
  eval_data_files: {}
3
  eval_datasets: {}
 
4
  eval_optional_args: []
5
  eval_size: {}
6
  eval_split: {}
7
  eval_subset: {}
8
  eval_template: {}
 
9
  train_data_files: {}
10
+ train_datasets: /aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-7b_cosi_cut/merged/top1-20
11
  train_name: text-image-to-text
12
  train_optional_args: []
13
  train_size: {}
14
  train_split: train
15
+ train_template: AA_TI2T_LLAVA
16
  logger_cfgs:
17
  cache_dir: {}
18
  log_project: align-anything
19
+ log_run_name: dpo
20
  log_type: wandb
21
+ output_dir: ../outputs/llava_1.6_vicuna_7B_cosi/top1-20
22
+ save_total_limit: 3
23
  model_cfgs:
24
+ model_max_length: 4096
25
+ model_name_or_path: /aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-7b-hf
26
  trust_remote_code: true
27
  special_tokens: {}
28
  train_cfgs:
29
  adam_betas:
30
  - 0.9
31
  - 0.95
 
32
  bf16: true
33
  ds_cfgs: ds_z3_config.json
34
  epochs: 3
 
38
  freeze_language_model: false
39
  freeze_mm_proj: false
40
  freeze_vision_tower: true
41
+ gradient_accumulation_steps: 1
42
  gradient_checkpointing: true
43
+ learning_rate: 1.0e-06
44
  load_checkpoint: false
45
  lr_scheduler_type: cosine
46
  lr_warmup_ratio: 0.03
 
47
  per_device_eval_batch_size: 1
48
  per_device_train_batch_size: 1
49
+ regularization: 0.001
50
  save_checkpoint: false
51
+ scale_coeff: 0.1
52
  seed: 42
53
  weight_decay: 0.0
environ.txt CHANGED
@@ -81,84 +81,48 @@ BASH_FUNC_switchml%%=() { typeset swfound=1;
81
  return 1;
82
  fi
83
  }
 
84
  BUILD=x86_64-conda-linux-gnu
 
85
  CC=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cc
86
  CC_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cc
87
- CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include -I/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/include -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
88
  CMAKE_ARGS=-DCMAKE_AR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ar -DCMAKE_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ranlib -DCMAKE_LINKER=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ld -DCMAKE_STRIP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strip -DCMAKE_BUILD_TYPE=Release
89
  CMAKE_PREFIX_PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama:/aifs4su/yaodong/miniconda3/envs/hantao_llama/x86_64-conda-linux-gnu/sysroot/usr
90
  CMD_WLM_CLUSTER_NAME=slurm
91
- CONDA_BACKUP_ADDR2LINE=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-addr2line
92
- CONDA_BACKUP_AR=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-ar
93
- CONDA_BACKUP_AS=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-as
94
- CONDA_BACKUP_BUILD=x86_64-conda-linux-gnu
95
- CONDA_BACKUP_CC=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-cc
96
- CONDA_BACKUP_CC_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-cc
97
- CONDA_BACKUP_CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include
98
- CONDA_BACKUP_CMAKE_PREFIX_PATH=/aifs4su/yaodong/miniconda3/envs/hantao_anything:/aifs4su/yaodong/miniconda3/envs/hantao_anything/x86_64-conda-linux-gnu/sysroot/usr
99
- CONDA_BACKUP_CONDA_BUILD_SYSROOT=/aifs4su/yaodong/miniconda3/envs/hantao_anything/x86_64-conda-linux-gnu/sysroot
100
- CONDA_BACKUP_CPP=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-cpp
101
- CONDA_BACKUP_CPPFLAGS=-DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include
102
- CONDA_BACKUP_CXX=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-c++
103
- CONDA_BACKUP_CXXFILT=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-c++filt
104
- CONDA_BACKUP_CXXFLAGS=-fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include
105
- CONDA_BACKUP_CXX_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-c++
106
- CONDA_BACKUP_DEBUG_CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include
107
- CONDA_BACKUP_DEBUG_CPPFLAGS=-D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include
108
- CONDA_BACKUP_DEBUG_CXXFLAGS=-fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include
109
- CONDA_BACKUP_ELFEDIT=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-elfedit
110
- CONDA_BACKUP_GCC=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-gcc
111
- CONDA_BACKUP_GCC_AR=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-gcc-ar
112
- CONDA_BACKUP_GCC_NM=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-gcc-nm
113
- CONDA_BACKUP_GCC_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-gcc-ranlib
114
- CONDA_BACKUP_GPROF=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-gprof
115
- CONDA_BACKUP_GXX=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-g++
116
- CONDA_BACKUP_HOST=x86_64-conda-linux-gnu
117
- CONDA_BACKUP_LD=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-ld
118
- CONDA_BACKUP_LDFLAGS=-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,-rpath,/aifs4su/yaodong/miniconda3/envs/hantao_anything/lib -Wl,-rpath-link,/aifs4su/yaodong/miniconda3/envs/hantao_anything/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_anything/lib
119
- CONDA_BACKUP_LD_GOLD=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-ld.gold
120
- CONDA_BACKUP_NM=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-nm
121
- CONDA_BACKUP_OBJCOPY=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-objcopy
122
- CONDA_BACKUP_OBJDUMP=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-objdump
123
- CONDA_BACKUP_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-ranlib
124
- CONDA_BACKUP_READELF=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-readelf
125
- CONDA_BACKUP_SIZE=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-size
126
- CONDA_BACKUP_STRINGS=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-strings
127
- CONDA_BACKUP_STRIP=/aifs4su/yaodong/miniconda3/envs/hantao_anything/bin/x86_64-conda-linux-gnu-strip
128
- CONDA_BACKUP__CONDA_PYTHON_SYSCONFIGDATA_NAME=_sysconfigdata_x86_64_conda_cos7_linux_gnu
129
- CONDA_BACKUP_build_alias=x86_64-conda-linux-gnu
130
- CONDA_BACKUP_host_alias=x86_64-conda-linux-gnu
131
  CONDA_BUILD_SYSROOT=/aifs4su/yaodong/miniconda3/envs/hantao_llama/x86_64-conda-linux-gnu/sysroot
132
  CONDA_DEFAULT_ENV=hantao_llama
133
  CONDA_EXE=/aifs4su/yaodong/miniconda3/bin/conda
134
  CONDA_PREFIX=/aifs4su/yaodong/miniconda3/envs/hantao_llama
135
  CONDA_PREFIX_1=/aifs4su/yaodong/miniconda3
136
- CONDA_PREFIX_2=/aifs4su/yaodong/miniconda3/envs/hantao_anything
137
  CONDA_PROMPT_MODIFIER=(hantao_llama)
138
  CONDA_PYTHON_EXE=/aifs4su/yaodong/miniconda3/bin/python
139
- CONDA_SHLVL=3
140
  CPATH=/cm/shared/apps/slurm/current/include
141
  CPATH_modshare=/cm/shared/apps/slurm/current/include:1
142
  CPP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cpp
143
- CPPFLAGS=-DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include -I/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/include -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
144
  CROSS_RANK=0
145
  CROSS_SIZE=1
146
  CUDA_MODULE_LOADING=LAZY
147
  CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
148
  CXX=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-c++
149
  CXXFILT=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-c++filt
150
- CXXFLAGS=-fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include -I/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/include -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
151
  CXX_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-c++
152
  DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1028/bus
153
- DEBUG_CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include
154
- DEBUG_CPPFLAGS=-D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include
155
- DEBUG_CXXFLAGS=-fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_anything/include
 
156
  ELFEDIT=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-elfedit
157
  ENABLE_LMOD=0
158
  GCC=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc
159
  GCC_AR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-ar
160
  GCC_NM=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-nm
161
  GCC_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-ranlib
 
162
  GPROF=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gprof
163
  GSETTINGS_SCHEMA_DIR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/share/glib-2.0/schemas
164
  GSETTINGS_SCHEMA_DIR_CONDA_BACKUP=
@@ -172,7 +136,7 @@ KMP_DUPLICATE_LIB_OK=True
172
  KMP_INIT_AT_FORK=FALSE
173
  LANG=C.UTF-8
174
  LD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ld
175
- LDFLAGS=-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,-rpath,/aifs4su/yaodong/miniconda3/envs/hantao_anything/lib -Wl,-rpath-link,/aifs4su/yaodong/miniconda3/envs/hantao_anything/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_anything/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
176
  LD_GOLD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ld.gold
177
  LD_LIBRARY_PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/cv2/../../lib64:/usr/mpi/gcc/openmpi-4.1.7a1/lib:/cm/shared/apps/slurm/current/lib64/slurm:/cm/shared/apps/slurm/current/lib64
178
  LD_LIBRARY_PATH_modshare=/cm/shared/apps/slurm/current/lib64:1:/usr/mpi/gcc/openmpi-4.1.7a1/lib:1:/cm/shared/apps/slurm/current/lib64/slurm:1
@@ -192,7 +156,7 @@ LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd
192
  MANPATH=/usr/mpi/gcc/openmpi-4.1.7a1/share/man:/cm/shared/apps/slurm/current/man:/cm/local/apps/environment-modules/4.5.3/share/man:/usr/local/man:/usr/local/share/man:/usr/share/man:/cm/local/apps/environment-modules/current/share/man:/cm/local/apps/environment-modules/current/share/man
193
  MANPATH_modshare=/usr/local/share/man:1:/usr/mpi/gcc/openmpi-4.1.7a1/share/man:1:/cm/local/apps/environment-modules/current/share/man:1:/cm/local/apps/environment-modules/4.5.3/share/man:1:/usr/local/man:1:/usr/share/man:1:/cm/shared/apps/slurm/current/man:1
194
  MASTER_ADDR=127.0.0.1
195
- MASTER_PORT=55074
196
  MIG_PARTED_CHECKPOINT_FILE=/var/lib/nvidia-mig-manager/checkpoint.json
197
  MIG_PARTED_CONFIG_FILE=/etc/nvidia-mig-manager/config.yaml
198
  MIG_PARTED_HOOKS_FILE=/etc/nvidia-mig-manager/hooks.yaml
@@ -209,39 +173,44 @@ NVCC_PREPEND_FLAGS_BACKUP= -ccbin=/aifs4su/yaodong/miniconda3/bin/x86_64-conda-l
209
  NVITOP_MONITOR_MODE=colorful
210
  OBJCOPY=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-objcopy
211
  OBJDUMP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-objdump
212
- OLDPWD=/aifs4su/yaodong/hantao/LLaMA-Factory
213
- PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin:/aifs4su/yaodong/miniconda3/condabin:/usr/mpi/gcc/openmpi-4.1.7a1/bin:/usr/lpp/mmfs/bin:/usr/local/cuda/bin:/opt/bin:/usr/lpp/mmfs/bin:/cm/shared/apps/slurm/current/sbin:/cm/shared/apps/slurm/current/bin:/usr/local/cuda/bin:/opt/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/sbin:/usr/sbin:/cm/local/apps/environment-modules/4.5.3/bin
214
- PATH_modshare=/usr/mpi/gcc/openmpi-4.1.7a1/bin:1:/opt/bin/:1:/usr/bin:1:/usr/local/bin:1:/cm/shared/apps/slurm/current/bin:1:/cm/shared/apps/slurm/current/sbin:1:/bin:1:/snap/bin:1:/sbin:1:/usr/sbin:1:/cm/local/apps/environment-modules/4.5.3/bin:1:/usr/games:1:/usr/local/sbin:1:/usr/lpp/mmfs/bin:1:/usr/local/cuda/bin:1:/usr/local/games:1
215
  PWD=/aifs4su/yaodong/hantao/align-anything/scripts
 
216
  PYTHONHASHSEED=42
217
  PYTHONPATH=/aifs4su/yaodong/hantao/align-anything/scripts
218
- QT_QPA_FONTDIR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/cv2/qt/fonts
219
- QT_QPA_PLATFORM_PLUGIN_PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/cv2/qt/plugins
220
  RANK=0
221
  RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ranlib
222
  READELF=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-readelf
223
  SHELL=/bin/bash
224
- SHLVL=3
225
  SIZE=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-size
226
  SLURM_CONF=/cm/shared/apps/slurm/var/etc/slurm/slurm.conf
227
- SSH_CLIENT=10.33.4.229 44942 22
228
- SSH_CONNECTION=10.33.4.51 48576 10.33.4.230 22
229
- SSH_TTY=/dev/pts/2
 
230
  STRINGS=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strings
231
  STRIP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strip
232
  TERM=screen
233
  TERM_PROGRAM=tmux
234
  TERM_PROGRAM_VERSION=3.2a
235
- TMUX=/tmp/tmux-1028/default,2764504,18
236
- TMUX_PANE=%18
237
  USER=yangyaodong
 
 
 
 
 
238
  WANDB_API_KEY=7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33
239
- WANDB_SERVICE=2-125374-tcp-localhost-39903
240
  WORLD_SIZE=8
241
  XDG_DATA_DIRS=/usr/local/share:/usr/share:/var/lib/snapd/desktop
242
  XDG_RUNTIME_DIR=/run/user/1028
243
  XDG_SESSION_CLASS=user
244
- XDG_SESSION_ID=63754
245
  XDG_SESSION_TYPE=tty
246
  ZERO_STAGE=3
247
  _=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/deepspeed
 
81
  return 1;
82
  fi
83
  }
84
+ BROWSER=/home/yangyaodong/.vscode-server/cli/servers/Stable-e54c774e0add60467559eb0d1e229c6452cf8447/server/bin/helpers/browser.sh
85
  BUILD=x86_64-conda-linux-gnu
86
+ BUNDLED_DEBUGPY_PATH=/home/yangyaodong/.vscode-server/extensions/ms-python.debugpy-2025.0.1-linux-x64/bundled/libs/debugpy
87
  CC=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cc
88
  CC_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cc
89
+ CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include -I/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/include -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
90
  CMAKE_ARGS=-DCMAKE_AR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ar -DCMAKE_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ranlib -DCMAKE_LINKER=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ld -DCMAKE_STRIP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strip -DCMAKE_BUILD_TYPE=Release
91
  CMAKE_PREFIX_PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama:/aifs4su/yaodong/miniconda3/envs/hantao_llama/x86_64-conda-linux-gnu/sysroot/usr
92
  CMD_WLM_CLUSTER_NAME=slurm
93
+ COLORTERM=truecolor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  CONDA_BUILD_SYSROOT=/aifs4su/yaodong/miniconda3/envs/hantao_llama/x86_64-conda-linux-gnu/sysroot
95
  CONDA_DEFAULT_ENV=hantao_llama
96
  CONDA_EXE=/aifs4su/yaodong/miniconda3/bin/conda
97
  CONDA_PREFIX=/aifs4su/yaodong/miniconda3/envs/hantao_llama
98
  CONDA_PREFIX_1=/aifs4su/yaodong/miniconda3
 
99
  CONDA_PROMPT_MODIFIER=(hantao_llama)
100
  CONDA_PYTHON_EXE=/aifs4su/yaodong/miniconda3/bin/python
101
+ CONDA_SHLVL=2
102
  CPATH=/cm/shared/apps/slurm/current/include
103
  CPATH_modshare=/cm/shared/apps/slurm/current/include:1
104
  CPP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-cpp
105
+ CPPFLAGS=-DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include -I/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/include -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
106
  CROSS_RANK=0
107
  CROSS_SIZE=1
108
  CUDA_MODULE_LOADING=LAZY
109
  CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
110
  CXX=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-c++
111
  CXXFILT=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-c++filt
112
+ CXXFLAGS=-fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include -I/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/include -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
113
  CXX_FOR_BUILD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-c++
114
  DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1028/bus
115
+ DEBUGPY_ADAPTER_ENDPOINTS=/home/yangyaodong/.vscode-server/extensions/ms-python.debugpy-2025.0.1-linux-x64/.noConfigDebugAdapterEndpoints/endpoint-cf2a8fd1c0b5bb2d.txt
116
+ DEBUG_CFLAGS=-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include
117
+ DEBUG_CPPFLAGS=-D_DEBUG -D_FORTIFY_SOURCE=2 -Og -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include
118
+ DEBUG_CXXFLAGS=-fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-all -fno-plt -Og -g -Wall -Wextra -fvar-tracking-assignments -ffunction-sections -pipe -isystem /aifs4su/yaodong/miniconda3/envs/hantao_llama/include
119
  ELFEDIT=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-elfedit
120
  ENABLE_LMOD=0
121
  GCC=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc
122
  GCC_AR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-ar
123
  GCC_NM=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-nm
124
  GCC_RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gcc-ranlib
125
+ GIT_ASKPASS=/home/yangyaodong/.vscode-server/cli/servers/Stable-e54c774e0add60467559eb0d1e229c6452cf8447/server/extensions/git/dist/askpass.sh
126
  GPROF=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-gprof
127
  GSETTINGS_SCHEMA_DIR=/aifs4su/yaodong/miniconda3/envs/hantao_llama/share/glib-2.0/schemas
128
  GSETTINGS_SCHEMA_DIR_CONDA_BACKUP=
 
136
  KMP_INIT_AT_FORK=FALSE
137
  LANG=C.UTF-8
138
  LD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ld
139
+ LDFLAGS=-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,-rpath,/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib -Wl,-rpath-link,/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib -L/aifs4su/yaodong/miniconda3/envs/hantao_llama/targets/x86_64-linux/lib/stubs
140
  LD_GOLD=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ld.gold
141
  LD_LIBRARY_PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/cv2/../../lib64:/usr/mpi/gcc/openmpi-4.1.7a1/lib:/cm/shared/apps/slurm/current/lib64/slurm:/cm/shared/apps/slurm/current/lib64
142
  LD_LIBRARY_PATH_modshare=/cm/shared/apps/slurm/current/lib64:1:/usr/mpi/gcc/openmpi-4.1.7a1/lib:1:/cm/shared/apps/slurm/current/lib64/slurm:1
 
156
  MANPATH=/usr/mpi/gcc/openmpi-4.1.7a1/share/man:/cm/shared/apps/slurm/current/man:/cm/local/apps/environment-modules/4.5.3/share/man:/usr/local/man:/usr/local/share/man:/usr/share/man:/cm/local/apps/environment-modules/current/share/man:/cm/local/apps/environment-modules/current/share/man
157
  MANPATH_modshare=/usr/local/share/man:1:/usr/mpi/gcc/openmpi-4.1.7a1/share/man:1:/cm/local/apps/environment-modules/current/share/man:1:/cm/local/apps/environment-modules/4.5.3/share/man:1:/usr/local/man:1:/usr/share/man:1:/cm/shared/apps/slurm/current/man:1
158
  MASTER_ADDR=127.0.0.1
159
+ MASTER_PORT=17196
160
  MIG_PARTED_CHECKPOINT_FILE=/var/lib/nvidia-mig-manager/checkpoint.json
161
  MIG_PARTED_CONFIG_FILE=/etc/nvidia-mig-manager/config.yaml
162
  MIG_PARTED_HOOKS_FILE=/etc/nvidia-mig-manager/hooks.yaml
 
173
  NVITOP_MONITOR_MODE=colorful
174
  OBJCOPY=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-objcopy
175
  OBJDUMP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-objdump
176
+ OLDPWD=/home/yangyaodong
177
+ PATH=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin:/usr/lpp/mmfs/bin:/usr/local/cuda/bin:/opt/bin:/usr/lpp/mmfs/bin:/cm/shared/apps/slurm/current/sbin:/cm/shared/apps/slurm/current/bin:/usr/local/cuda/bin:/opt/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/sbin:/usr/sbin:/cm/local/apps/environment-modules/4.5.3/bin
178
+ PATH_modshare=/usr/mpi/gcc/openmpi-4.1.7a1/bin:1:/opt/bin/:1:/usr/bin:1:/usr/local/bin:1:/cm/shared/apps/slurm/current/bin:1:/home/yangyaodong/.vscode-server/cli/servers/Stable-e54c774e0add60467559eb0d1e229c6452cf8447/server/bin/remote-cli:1:/cm/shared/apps/slurm/current/sbin:1:/bin:1:/snap/bin:1:/sbin:1:/home/yangyaodong/.vscode-server/data/User/globalStorage/github.copilot-chat/debugCommand:1:/home/yangyaodong/.vscode-server/extensions/ms-python.debugpy-2025.0.1-linux-x64/bundled/scripts/noConfigScripts:1:/usr/sbin:1:/usr/games:1:/cm/local/apps/environment-modules/4.5.3/bin:1:/usr/local/sbin:1:/usr/lpp/mmfs/bin:1:/usr/local/cuda/bin:1:/usr/local/games:1
179
  PWD=/aifs4su/yaodong/hantao/align-anything/scripts
180
+ PYDEVD_DISABLE_FILE_VALIDATION=1
181
  PYTHONHASHSEED=42
182
  PYTHONPATH=/aifs4su/yaodong/hantao/align-anything/scripts
 
 
183
  RANK=0
184
  RANLIB=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-ranlib
185
  READELF=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-readelf
186
  SHELL=/bin/bash
187
+ SHLVL=4
188
  SIZE=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-size
189
  SLURM_CONF=/cm/shared/apps/slurm/var/etc/slurm/slurm.conf
190
+ SSH_CLIENT=10.33.4.51 46666 22
191
+ SSH_CONNECTION=10.33.4.230 40638 10.33.4.213 22
192
+ SSL_CERT_DIR=/usr/lib/ssl/certs
193
+ SSL_CERT_FILE=/usr/lib/ssl/certs/ca-certificates.crt
194
  STRINGS=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strings
195
  STRIP=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/x86_64-conda-linux-gnu-strip
196
  TERM=screen
197
  TERM_PROGRAM=tmux
198
  TERM_PROGRAM_VERSION=3.2a
199
+ TMUX=/tmp/tmux-1028/default,2296743,10
200
+ TMUX_PANE=%25
201
  USER=yangyaodong
202
+ VSCODE_GIT_ASKPASS_EXTRA_ARGS=
203
+ VSCODE_GIT_ASKPASS_MAIN=/home/yangyaodong/.vscode-server/cli/servers/Stable-e54c774e0add60467559eb0d1e229c6452cf8447/server/extensions/git/dist/askpass-main.js
204
+ VSCODE_GIT_ASKPASS_NODE=/home/yangyaodong/.vscode-server/cli/servers/Stable-e54c774e0add60467559eb0d1e229c6452cf8447/server/node
205
+ VSCODE_GIT_IPC_HANDLE=/run/user/1028/vscode-git-bbbbf321f6.sock
206
+ VSCODE_IPC_HOOK_CLI=/run/user/1028/vscode-ipc-e2edf668-dca9-4331-a6ac-7d4507f653ce.sock
207
  WANDB_API_KEY=7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33
208
+ WANDB_SERVICE=2-516629-tcp-localhost-38869
209
  WORLD_SIZE=8
210
  XDG_DATA_DIRS=/usr/local/share:/usr/share:/var/lib/snapd/desktop
211
  XDG_RUNTIME_DIR=/run/user/1028
212
  XDG_SESSION_CLASS=user
213
+ XDG_SESSION_ID=43255
214
  XDG_SESSION_TYPE=tty
215
  ZERO_STAGE=3
216
  _=/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/deepspeed
script.sh CHANGED
@@ -15,35 +15,37 @@
15
  # limitations under the License.
16
  # ==============================================================================
17
 
18
- DATASETS_NAME=("top1-20" "top1-40" "top1-100")
19
 
20
- MODEL_NAME_OR_PATH="/aifs4su/yaodong/hantao/models/llava-v1.6-mistral-7b-hf" # model path
21
 
22
- for DATASET_NAME in "${DATASETS_NAME[@]}"; do
23
- TRAIN_DATASETS="/aifs4su/yaodong/hantao/datasets/MMInstruct-GPT4V_mistral-7b_cosi_cut/merged/${DATASET_NAME}" # dataset path
24
- TRAIN_TEMPLATE="MM_TI2T_LLAVA" # dataset template
 
 
25
  TRAIN_NAME="text-image-to-text" # dataset name
26
  TRAIN_SPLIT="train" # split the dataset
27
 
28
- OUTPUT_DIR="../outputs/LLAVA_7B_cosi/${DATASET_NAME}" # output dir
29
-
30
- # For wandb online logging
31
- export WANDB_API_KEY="7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33"
32
-
33
- # Source the setup script
34
- source ./setup.sh
35
-
36
- # Execute deepspeed command
37
- deepspeed \
38
- --master_port ${MASTER_PORT} \
39
- --module align_anything.trainers.text_image_to_text.sft \
40
- --model_name_or_path ${MODEL_NAME_OR_PATH} \
41
- --train_datasets ${TRAIN_DATASETS} \
42
- --train_template ${TRAIN_TEMPLATE} \
43
- --train_split ${TRAIN_SPLIT} \
44
- --train_name ${TRAIN_NAME} \
45
- --output_dir ${OUTPUT_DIR} \
46
- --save_total_limit 6 \
47
- --train_batch_size 16 \
48
- --epochs 3
49
  done
 
15
  # limitations under the License.
16
  # ==============================================================================
17
 
18
+ DATASETS_NAME=("top1-20" "top1-40")
19
 
20
+ MODEL_NAME_OR_PATH="/aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-7b-hf" # model path
21
 
22
+ # HOSTFILE="/aifs4su/yaodong/hantao/align-anything/scripts/.hostfile"
23
+
24
+ for DATASET_NAME in ${DATASETS_NAME[@]}; do
25
+ TRAIN_DATASETS="/aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-7b_cosi_cut/merged/${DATASET_NAME}" # dataset path
26
+ TRAIN_TEMPLATE="AA_TI2T_LLAVA" # dataset template
27
  TRAIN_NAME="text-image-to-text" # dataset name
28
  TRAIN_SPLIT="train" # split the dataset
29
 
30
+ OUTPUT_DIR="../outputs/llava_1.6_vicuna_7B_cosi/${DATASET_NAME}" # output dir
31
+
32
+ # For wandb online logging
33
+ export WANDB_API_KEY="7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33"
34
+
35
+ # Source the setup script
36
+ source ./setup.sh
37
+
38
+ # Execute deepspeed command
39
+ deepspeed \
40
+ --master_port ${MASTER_PORT} \
41
+ --module align_anything.trainers.text_image_to_text.dpo \
42
+ --model_name_or_path ${MODEL_NAME_OR_PATH} \
43
+ --train_datasets ${TRAIN_DATASETS} \
44
+ --train_template ${TRAIN_TEMPLATE} \
45
+ --train_split ${TRAIN_SPLIT} \
46
+ --train_name ${TRAIN_NAME} \
47
+ --output_dir ${OUTPUT_DIR} \
48
+ --save_total_limit 3 \
49
+ --train_batch_size 8 \
50
+ --epochs 3
51
  done
slice_1946/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image>": 32000
3
+ }
slice_1946/chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% for message in messages %}{% if message['role'] != 'system' %}{{ message['role'].upper() + ': '}}{% endif %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + ' '}}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ content['text'] + ' '}}{% endgeneration %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}"
3
+ }
slice_1946/config.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": true,
3
+ "architectures": [
4
+ "LlavaNextForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "ignore_index": -100,
9
+ "image_grid_pinpoints": [
10
+ [
11
+ 336,
12
+ 672
13
+ ],
14
+ [
15
+ 672,
16
+ 336
17
+ ],
18
+ [
19
+ 672,
20
+ 672
21
+ ],
22
+ [
23
+ 1008,
24
+ 336
25
+ ],
26
+ [
27
+ 336,
28
+ 1008
29
+ ]
30
+ ],
31
+ "image_seq_length": 576,
32
+ "image_token_index": 32000,
33
+ "model_type": "llava_next",
34
+ "multimodal_projector_bias": true,
35
+ "pad_token_id": 0,
36
+ "projector_hidden_act": "gelu",
37
+ "text_config": {
38
+ "_name_or_path": "lmsys/vicuna-7b-v1.5",
39
+ "architectures": [
40
+ "LlamaForCausalLM"
41
+ ],
42
+ "attention_bias": false,
43
+ "attention_dropout": 0.0,
44
+ "head_dim": 128,
45
+ "hidden_act": "silu",
46
+ "hidden_size": 4096,
47
+ "initializer_range": 0.02,
48
+ "intermediate_size": 11008,
49
+ "max_position_embeddings": 4096,
50
+ "mlp_bias": false,
51
+ "model_type": "llama",
52
+ "num_attention_heads": 32,
53
+ "num_hidden_layers": 32,
54
+ "num_key_value_heads": 32,
55
+ "pad_token_id": 0,
56
+ "pretraining_tp": 1,
57
+ "rms_norm_eps": 1e-05,
58
+ "rope_scaling": null,
59
+ "rope_theta": 10000.0,
60
+ "torch_dtype": "bfloat16",
61
+ "use_cache": true,
62
+ "vocab_size": 32064
63
+ },
64
+ "tie_word_embeddings": false,
65
+ "torch_dtype": "bfloat16",
66
+ "transformers_version": "4.50.0",
67
+ "use_image_newline_parameter": true,
68
+ "vision_config": {
69
+ "attention_dropout": 0.0,
70
+ "hidden_act": "quick_gelu",
71
+ "hidden_size": 1024,
72
+ "image_size": 336,
73
+ "initializer_factor": 1.0,
74
+ "initializer_range": 0.02,
75
+ "intermediate_size": 4096,
76
+ "layer_norm_eps": 1e-05,
77
+ "model_type": "clip_vision_model",
78
+ "num_attention_heads": 16,
79
+ "num_channels": 3,
80
+ "num_hidden_layers": 24,
81
+ "patch_size": 14,
82
+ "projection_dim": 768,
83
+ "torch_dtype": "bfloat16",
84
+ "vocab_size": 32000
85
+ },
86
+ "vision_feature_layer": -2,
87
+ "vision_feature_select_strategy": "default",
88
+ "vocab_size": 32064
89
+ }
slice_1946/preprocessor_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "aspect_ratio_setting": "anyres",
3
+ "crop_size": {
4
+ "height": 336,
5
+ "width": 336
6
+ },
7
+ "do_center_crop": true,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_pad": true,
11
+ "do_rescale": true,
12
+ "do_resize": true,
13
+ "image_grid_pinpoints": [
14
+ [
15
+ 336,
16
+ 672
17
+ ],
18
+ [
19
+ 672,
20
+ 336
21
+ ],
22
+ [
23
+ 672,
24
+ 672
25
+ ],
26
+ [
27
+ 1008,
28
+ 336
29
+ ],
30
+ [
31
+ 336,
32
+ 1008
33
+ ]
34
+ ],
35
+ "image_mean": [
36
+ 0.48145466,
37
+ 0.4578275,
38
+ 0.40821073
39
+ ],
40
+ "image_processor_type": "LlavaNextImageProcessor",
41
+ "image_std": [
42
+ 0.26862954,
43
+ 0.26130258,
44
+ 0.27577711
45
+ ],
46
+ "processor_class": "LlavaNextProcessor",
47
+ "resample": 3,
48
+ "rescale_factor": 0.00392156862745098,
49
+ "size": {
50
+ "shortest_edge": 336
51
+ }
52
+ }
slice_1946/processor_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_token": "<image>",
3
+ "num_additional_image_tokens": 1,
4
+ "patch_size": 14,
5
+ "processor_class": "LlavaNextProcessor",
6
+ "vision_feature_select_strategy": "default"
7
+ }
slice_1946/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26125e26b5c79b364ed2575b867704cb01b6881c2952992f7796065d357fbb00
3
+ size 14127100866
slice_1946/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "image_token": "<image>",
17
+ "pad_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "unk_token": {
25
+ "content": "<unk>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
slice_1946/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
slice_1946/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
slice_1946/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32000": {
31
+ "content": "<image>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ }
38
+ },
39
+ "bos_token": "<s>",
40
+ "clean_up_tokenization_spaces": false,
41
+ "eos_token": "</s>",
42
+ "extra_special_tokens": {
43
+ "image_token": "<image>"
44
+ },
45
+ "image_token": "<image>",
46
+ "legacy": false,
47
+ "model_max_length": 4096,
48
+ "pad_token": "<unk>",
49
+ "padding_side": "left",
50
+ "processor_class": "LlavaNextProcessor",
51
+ "sp_model_kwargs": {},
52
+ "spaces_between_special_tokens": false,
53
+ "tokenizer_class": "LlamaTokenizer",
54
+ "unk_token": "<unk>",
55
+ "use_default_system_prompt": false
56
+ }
slice_2919/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image>": 32000
3
+ }
slice_2919/chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% for message in messages %}{% if message['role'] != 'system' %}{{ message['role'].upper() + ': '}}{% endif %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + ' '}}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ content['text'] + ' '}}{% endgeneration %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}"
3
+ }
slice_2919/config.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": true,
3
+ "architectures": [
4
+ "LlavaNextForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "ignore_index": -100,
9
+ "image_grid_pinpoints": [
10
+ [
11
+ 336,
12
+ 672
13
+ ],
14
+ [
15
+ 672,
16
+ 336
17
+ ],
18
+ [
19
+ 672,
20
+ 672
21
+ ],
22
+ [
23
+ 1008,
24
+ 336
25
+ ],
26
+ [
27
+ 336,
28
+ 1008
29
+ ]
30
+ ],
31
+ "image_seq_length": 576,
32
+ "image_token_index": 32000,
33
+ "model_type": "llava_next",
34
+ "multimodal_projector_bias": true,
35
+ "pad_token_id": 0,
36
+ "projector_hidden_act": "gelu",
37
+ "text_config": {
38
+ "_name_or_path": "lmsys/vicuna-7b-v1.5",
39
+ "architectures": [
40
+ "LlamaForCausalLM"
41
+ ],
42
+ "attention_bias": false,
43
+ "attention_dropout": 0.0,
44
+ "head_dim": 128,
45
+ "hidden_act": "silu",
46
+ "hidden_size": 4096,
47
+ "initializer_range": 0.02,
48
+ "intermediate_size": 11008,
49
+ "max_position_embeddings": 4096,
50
+ "mlp_bias": false,
51
+ "model_type": "llama",
52
+ "num_attention_heads": 32,
53
+ "num_hidden_layers": 32,
54
+ "num_key_value_heads": 32,
55
+ "pad_token_id": 0,
56
+ "pretraining_tp": 1,
57
+ "rms_norm_eps": 1e-05,
58
+ "rope_scaling": null,
59
+ "rope_theta": 10000.0,
60
+ "torch_dtype": "bfloat16",
61
+ "use_cache": true,
62
+ "vocab_size": 32064
63
+ },
64
+ "tie_word_embeddings": false,
65
+ "torch_dtype": "bfloat16",
66
+ "transformers_version": "4.50.0",
67
+ "use_image_newline_parameter": true,
68
+ "vision_config": {
69
+ "attention_dropout": 0.0,
70
+ "hidden_act": "quick_gelu",
71
+ "hidden_size": 1024,
72
+ "image_size": 336,
73
+ "initializer_factor": 1.0,
74
+ "initializer_range": 0.02,
75
+ "intermediate_size": 4096,
76
+ "layer_norm_eps": 1e-05,
77
+ "model_type": "clip_vision_model",
78
+ "num_attention_heads": 16,
79
+ "num_channels": 3,
80
+ "num_hidden_layers": 24,
81
+ "patch_size": 14,
82
+ "projection_dim": 768,
83
+ "torch_dtype": "bfloat16",
84
+ "vocab_size": 32000
85
+ },
86
+ "vision_feature_layer": -2,
87
+ "vision_feature_select_strategy": "default",
88
+ "vocab_size": 32064
89
+ }
slice_2919/preprocessor_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "aspect_ratio_setting": "anyres",
3
+ "crop_size": {
4
+ "height": 336,
5
+ "width": 336
6
+ },
7
+ "do_center_crop": true,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_pad": true,
11
+ "do_rescale": true,
12
+ "do_resize": true,
13
+ "image_grid_pinpoints": [
14
+ [
15
+ 336,
16
+ 672
17
+ ],
18
+ [
19
+ 672,
20
+ 336
21
+ ],
22
+ [
23
+ 672,
24
+ 672
25
+ ],
26
+ [
27
+ 1008,
28
+ 336
29
+ ],
30
+ [
31
+ 336,
32
+ 1008
33
+ ]
34
+ ],
35
+ "image_mean": [
36
+ 0.48145466,
37
+ 0.4578275,
38
+ 0.40821073
39
+ ],
40
+ "image_processor_type": "LlavaNextImageProcessor",
41
+ "image_std": [
42
+ 0.26862954,
43
+ 0.26130258,
44
+ 0.27577711
45
+ ],
46
+ "processor_class": "LlavaNextProcessor",
47
+ "resample": 3,
48
+ "rescale_factor": 0.00392156862745098,
49
+ "size": {
50
+ "shortest_edge": 336
51
+ }
52
+ }
slice_2919/processor_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_token": "<image>",
3
+ "num_additional_image_tokens": 1,
4
+ "patch_size": 14,
5
+ "processor_class": "LlavaNextProcessor",
6
+ "vision_feature_select_strategy": "default"
7
+ }
slice_2919/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f2fe0e44c0cb29572163848f7e15956178c685c2ceeb9930e9e5b34a45fd941
3
+ size 14127100866
slice_2919/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "image_token": "<image>",
17
+ "pad_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "unk_token": {
25
+ "content": "<unk>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
slice_2919/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
slice_2919/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
slice_2919/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32000": {
31
+ "content": "<image>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ }
38
+ },
39
+ "bos_token": "<s>",
40
+ "clean_up_tokenization_spaces": false,
41
+ "eos_token": "</s>",
42
+ "extra_special_tokens": {
43
+ "image_token": "<image>"
44
+ },
45
+ "image_token": "<image>",
46
+ "legacy": false,
47
+ "model_max_length": 4096,
48
+ "pad_token": "<unk>",
49
+ "padding_side": "left",
50
+ "processor_class": "LlavaNextProcessor",
51
+ "sp_model_kwargs": {},
52
+ "spaces_between_special_tokens": false,
53
+ "tokenizer_class": "LlamaTokenizer",
54
+ "unk_token": "<unk>",
55
+ "use_default_system_prompt": false
56
+ }
slice_973/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image>": 32000
3
+ }
slice_973/chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% for message in messages %}{% if message['role'] != 'system' %}{{ message['role'].upper() + ': '}}{% endif %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + ' '}}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ content['text'] + ' '}}{% endgeneration %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}"
3
+ }
slice_973/config.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": true,
3
+ "architectures": [
4
+ "LlavaNextForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "ignore_index": -100,
9
+ "image_grid_pinpoints": [
10
+ [
11
+ 336,
12
+ 672
13
+ ],
14
+ [
15
+ 672,
16
+ 336
17
+ ],
18
+ [
19
+ 672,
20
+ 672
21
+ ],
22
+ [
23
+ 1008,
24
+ 336
25
+ ],
26
+ [
27
+ 336,
28
+ 1008
29
+ ]
30
+ ],
31
+ "image_seq_length": 576,
32
+ "image_token_index": 32000,
33
+ "model_type": "llava_next",
34
+ "multimodal_projector_bias": true,
35
+ "pad_token_id": 0,
36
+ "projector_hidden_act": "gelu",
37
+ "text_config": {
38
+ "_name_or_path": "lmsys/vicuna-7b-v1.5",
39
+ "architectures": [
40
+ "LlamaForCausalLM"
41
+ ],
42
+ "attention_bias": false,
43
+ "attention_dropout": 0.0,
44
+ "head_dim": 128,
45
+ "hidden_act": "silu",
46
+ "hidden_size": 4096,
47
+ "initializer_range": 0.02,
48
+ "intermediate_size": 11008,
49
+ "max_position_embeddings": 4096,
50
+ "mlp_bias": false,
51
+ "model_type": "llama",
52
+ "num_attention_heads": 32,
53
+ "num_hidden_layers": 32,
54
+ "num_key_value_heads": 32,
55
+ "pad_token_id": 0,
56
+ "pretraining_tp": 1,
57
+ "rms_norm_eps": 1e-05,
58
+ "rope_scaling": null,
59
+ "rope_theta": 10000.0,
60
+ "torch_dtype": "bfloat16",
61
+ "use_cache": true,
62
+ "vocab_size": 32064
63
+ },
64
+ "tie_word_embeddings": false,
65
+ "torch_dtype": "bfloat16",
66
+ "transformers_version": "4.50.0",
67
+ "use_image_newline_parameter": true,
68
+ "vision_config": {
69
+ "attention_dropout": 0.0,
70
+ "hidden_act": "quick_gelu",
71
+ "hidden_size": 1024,
72
+ "image_size": 336,
73
+ "initializer_factor": 1.0,
74
+ "initializer_range": 0.02,
75
+ "intermediate_size": 4096,
76
+ "layer_norm_eps": 1e-05,
77
+ "model_type": "clip_vision_model",
78
+ "num_attention_heads": 16,
79
+ "num_channels": 3,
80
+ "num_hidden_layers": 24,
81
+ "patch_size": 14,
82
+ "projection_dim": 768,
83
+ "torch_dtype": "bfloat16",
84
+ "vocab_size": 32000
85
+ },
86
+ "vision_feature_layer": -2,
87
+ "vision_feature_select_strategy": "default",
88
+ "vocab_size": 32064
89
+ }
slice_973/preprocessor_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "aspect_ratio_setting": "anyres",
3
+ "crop_size": {
4
+ "height": 336,
5
+ "width": 336
6
+ },
7
+ "do_center_crop": true,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_pad": true,
11
+ "do_rescale": true,
12
+ "do_resize": true,
13
+ "image_grid_pinpoints": [
14
+ [
15
+ 336,
16
+ 672
17
+ ],
18
+ [
19
+ 672,
20
+ 336
21
+ ],
22
+ [
23
+ 672,
24
+ 672
25
+ ],
26
+ [
27
+ 1008,
28
+ 336
29
+ ],
30
+ [
31
+ 336,
32
+ 1008
33
+ ]
34
+ ],
35
+ "image_mean": [
36
+ 0.48145466,
37
+ 0.4578275,
38
+ 0.40821073
39
+ ],
40
+ "image_processor_type": "LlavaNextImageProcessor",
41
+ "image_std": [
42
+ 0.26862954,
43
+ 0.26130258,
44
+ 0.27577711
45
+ ],
46
+ "processor_class": "LlavaNextProcessor",
47
+ "resample": 3,
48
+ "rescale_factor": 0.00392156862745098,
49
+ "size": {
50
+ "shortest_edge": 336
51
+ }
52
+ }
slice_973/processor_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_token": "<image>",
3
+ "num_additional_image_tokens": 1,
4
+ "patch_size": 14,
5
+ "processor_class": "LlavaNextProcessor",
6
+ "vision_feature_select_strategy": "default"
7
+ }
slice_973/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53c94f98c6773a4bd6036fd0a45deace471472bd745b2d9c4981b365fcbb5ef5
3
+ size 14127100866
slice_973/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "image_token": "<image>",
17
+ "pad_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "unk_token": {
25
+ "content": "<unk>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
slice_973/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
slice_973/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
slice_973/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32000": {
31
+ "content": "<image>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ }
38
+ },
39
+ "bos_token": "<s>",
40
+ "clean_up_tokenization_spaces": false,
41
+ "eos_token": "</s>",
42
+ "extra_special_tokens": {
43
+ "image_token": "<image>"
44
+ },
45
+ "image_token": "<image>",
46
+ "legacy": false,
47
+ "model_max_length": 4096,
48
+ "pad_token": "<unk>",
49
+ "padding_side": "left",
50
+ "processor_class": "LlavaNextProcessor",
51
+ "sp_model_kwargs": {},
52
+ "spaces_between_special_tokens": false,
53
+ "tokenizer_class": "LlamaTokenizer",
54
+ "unk_token": "<unk>",
55
+ "use_default_system_prompt": false
56
+ }
slice_end/added_tokens.json CHANGED
@@ -1,4 +1,3 @@
1
  {
2
- "<image>": 32000,
3
- "<pad>": 32001
4
  }
 
1
  {
2
+ "<image>": 32000
 
3
  }
slice_end/config.json CHANGED
@@ -32,27 +32,31 @@
32
  "image_token_index": 32000,
33
  "model_type": "llava_next",
34
  "multimodal_projector_bias": true,
35
- "pad_token_id": 32001,
36
  "projector_hidden_act": "gelu",
37
  "text_config": {
38
- "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
39
  "architectures": [
40
- "MistralForCausalLM"
41
  ],
 
42
  "attention_dropout": 0.0,
43
  "head_dim": 128,
44
  "hidden_act": "silu",
45
  "hidden_size": 4096,
46
  "initializer_range": 0.02,
47
- "intermediate_size": 14336,
48
- "max_position_embeddings": 32768,
49
- "model_type": "mistral",
 
50
  "num_attention_heads": 32,
51
  "num_hidden_layers": 32,
52
- "num_key_value_heads": 8,
 
 
53
  "rms_norm_eps": 1e-05,
54
- "rope_theta": 1000000.0,
55
- "sliding_window": null,
56
  "torch_dtype": "bfloat16",
57
  "use_cache": true,
58
  "vocab_size": 32064
 
32
  "image_token_index": 32000,
33
  "model_type": "llava_next",
34
  "multimodal_projector_bias": true,
35
+ "pad_token_id": 0,
36
  "projector_hidden_act": "gelu",
37
  "text_config": {
38
+ "_name_or_path": "lmsys/vicuna-7b-v1.5",
39
  "architectures": [
40
+ "LlamaForCausalLM"
41
  ],
42
+ "attention_bias": false,
43
  "attention_dropout": 0.0,
44
  "head_dim": 128,
45
  "hidden_act": "silu",
46
  "hidden_size": 4096,
47
  "initializer_range": 0.02,
48
+ "intermediate_size": 11008,
49
+ "max_position_embeddings": 4096,
50
+ "mlp_bias": false,
51
+ "model_type": "llama",
52
  "num_attention_heads": 32,
53
  "num_hidden_layers": 32,
54
+ "num_key_value_heads": 32,
55
+ "pad_token_id": 0,
56
+ "pretraining_tp": 1,
57
  "rms_norm_eps": 1e-05,
58
+ "rope_scaling": null,
59
+ "rope_theta": 10000.0,
60
  "torch_dtype": "bfloat16",
61
  "use_cache": true,
62
  "vocab_size": 32064
slice_end/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfb0a6769ac3b4acd0c5e0107a5d5fc7d31c94e81e0df91904edf89d26130aeb
3
- size 15133733934
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f2fe0e44c0cb29572163848f7e15956178c685c2ceeb9930e9e5b34a45fd941
3
+ size 14127100866
slice_end/special_tokens_map.json CHANGED
@@ -15,7 +15,7 @@
15
  },
16
  "image_token": "<image>",
17
  "pad_token": {
18
- "content": "<pad>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
 
15
  },
16
  "image_token": "<image>",
17
  "pad_token": {
18
+ "content": "<unk>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
slice_end/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
slice_end/tokenizer.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
- size 493443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
slice_end/tokenizer_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
- "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
@@ -34,32 +34,19 @@
34
  "rstrip": false,
35
  "single_word": false,
36
  "special": true
37
- },
38
- "32001": {
39
- "content": "<pad>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": true
45
  }
46
  },
47
- "additional_special_tokens": [],
48
  "bos_token": "<s>",
49
- "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
50
  "clean_up_tokenization_spaces": false,
51
  "eos_token": "</s>",
52
  "extra_special_tokens": {
53
  "image_token": "<image>"
54
  },
55
  "image_token": "<image>",
56
- "legacy": true,
57
- "max_length": null,
58
- "model_max_length": 2048,
59
- "pad_to_multiple_of": null,
60
- "pad_token": "<pad>",
61
- "pad_token_type_id": 0,
62
- "padding_side": "right",
63
  "processor_class": "LlavaNextProcessor",
64
  "sp_model_kwargs": {},
65
  "spaces_between_special_tokens": false,
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
34
  "rstrip": false,
35
  "single_word": false,
36
  "special": true
 
 
 
 
 
 
 
 
37
  }
38
  },
 
39
  "bos_token": "<s>",
 
40
  "clean_up_tokenization_spaces": false,
41
  "eos_token": "</s>",
42
  "extra_special_tokens": {
43
  "image_token": "<image>"
44
  },
45
  "image_token": "<image>",
46
+ "legacy": false,
47
+ "model_max_length": 4096,
48
+ "pad_token": "<unk>",
49
+ "padding_side": "left",
 
 
 
50
  "processor_class": "LlavaNextProcessor",
51
  "sp_model_kwargs": {},
52
  "spaces_between_special_tokens": false,
wandb/debug-internal.log CHANGED
@@ -1,18 +1,16 @@
1
- {"time":"2025-03-29T01:34:36.733988968+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.8","symlink path":"../outputs/LLAVA_7B_cosi/top1-20/wandb/run-20250329_013436-woy6qs03/logs/debug-core.log"}
2
- {"time":"2025-03-29T01:34:36.949126158+08:00","level":"INFO","msg":"created new stream","id":"woy6qs03"}
3
- {"time":"2025-03-29T01:34:36.949186856+08:00","level":"INFO","msg":"stream: started","id":"woy6qs03"}
4
- {"time":"2025-03-29T01:34:36.949207563+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"woy6qs03"}
5
- {"time":"2025-03-29T01:34:36.949215725+08:00","level":"INFO","msg":"sender: started","stream_id":"woy6qs03"}
6
- {"time":"2025-03-29T01:34:36.949220743+08:00","level":"INFO","msg":"handler: started","stream_id":"woy6qs03"}
7
- {"time":"2025-03-29T01:34:37.259100412+08:00","level":"INFO","msg":"Starting system monitor"}
8
- {"time":"2025-03-29T02:26:44.055561238+08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
9
- {"time":"2025-03-29T03:24:16.612632055+08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/htlou/align-anything/woy6qs03/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
10
- {"time":"2025-03-29T05:50:20.145178642+08:00","level":"INFO","msg":"Stopping system monitor"}
11
- {"time":"2025-03-29T05:50:20.151392274+08:00","level":"INFO","msg":"Stopped system monitor"}
12
- {"time":"2025-03-29T05:50:21.496932207+08:00","level":"INFO","msg":"stream: closing","id":"woy6qs03"}
13
- {"time":"2025-03-29T05:50:21.496985495+08:00","level":"WARN","msg":"sender: received Exit record more than once, ignoring"}
14
- {"time":"2025-03-29T05:50:21.724183035+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
15
- {"time":"2025-03-29T05:50:21.963289813+08:00","level":"INFO","msg":"handler: closed","stream_id":"woy6qs03"}
16
- {"time":"2025-03-29T05:50:21.963348455+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"woy6qs03"}
17
- {"time":"2025-03-29T05:50:21.963502619+08:00","level":"INFO","msg":"sender: closed","stream_id":"woy6qs03"}
18
- {"time":"2025-03-29T05:50:21.963744004+08:00","level":"INFO","msg":"stream: closed","id":"woy6qs03"}
 
1
+ {"time":"2025-04-01T09:28:24.081265347+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.8","symlink path":"../outputs/llava_1.6_vicuna_7B_cosi/top1-20/wandb/run-20250401_092824-bd2o51v8/logs/debug-core.log"}
2
+ {"time":"2025-04-01T09:28:24.302680251+08:00","level":"INFO","msg":"created new stream","id":"bd2o51v8"}
3
+ {"time":"2025-04-01T09:28:24.302754002+08:00","level":"INFO","msg":"stream: started","id":"bd2o51v8"}
4
+ {"time":"2025-04-01T09:28:24.302817732+08:00","level":"INFO","msg":"handler: started","stream_id":"bd2o51v8"}
5
+ {"time":"2025-04-01T09:28:24.302840797+08:00","level":"INFO","msg":"sender: started","stream_id":"bd2o51v8"}
6
+ {"time":"2025-04-01T09:28:24.302883549+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"bd2o51v8"}
7
+ {"time":"2025-04-01T09:28:24.717791905+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-04-01T11:33:47.258061087+08:00","level":"INFO","msg":"Stopping system monitor"}
9
+ {"time":"2025-04-01T11:33:47.265278587+08:00","level":"INFO","msg":"Stopped system monitor"}
10
+ {"time":"2025-04-01T11:33:48.674037127+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
11
+ {"time":"2025-04-01T11:33:48.72438635+08:00","level":"INFO","msg":"stream: closing","id":"bd2o51v8"}
12
+ {"time":"2025-04-01T11:33:48.724423251+08:00","level":"WARN","msg":"sender: received Exit record more than once, ignoring"}
13
+ {"time":"2025-04-01T11:33:48.894912192+08:00","level":"INFO","msg":"handler: closed","stream_id":"bd2o51v8"}
14
+ {"time":"2025-04-01T11:33:48.89495363+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"bd2o51v8"}
15
+ {"time":"2025-04-01T11:33:48.895096718+08:00","level":"INFO","msg":"sender: closed","stream_id":"bd2o51v8"}
16
+ {"time":"2025-04-01T11:33:48.895192408+08:00","level":"INFO","msg":"stream: closed","id":"bd2o51v8"}
 
 
wandb/debug.log CHANGED
@@ -1,32 +1,32 @@
1
- 2025-03-29 01:34:36,725 INFO MainThread:125374 [wandb_setup.py:_flush():67] Current SDK version is 0.19.8
2
- 2025-03-29 01:34:36,726 INFO MainThread:125374 [wandb_setup.py:_flush():67] Configure stats pid to 125374
3
- 2025-03-29 01:34:36,726 INFO MainThread:125374 [wandb_setup.py:_flush():67] Loading settings from /home/yangyaodong/.config/wandb/settings
4
- 2025-03-29 01:34:36,726 INFO MainThread:125374 [wandb_setup.py:_flush():67] Loading settings from /aifs4su/yaodong/hantao/align-anything/scripts/wandb/settings
5
- 2025-03-29 01:34:36,726 INFO MainThread:125374 [wandb_setup.py:_flush():67] Loading settings from environment variables
6
- 2025-03-29 01:34:36,726 INFO MainThread:125374 [wandb_init.py:setup_run_log_directory():647] Logging user logs to ../outputs/LLAVA_7B_cosi/top1-20/wandb/run-20250329_013436-woy6qs03/logs/debug.log
7
- 2025-03-29 01:34:36,726 INFO MainThread:125374 [wandb_init.py:setup_run_log_directory():648] Logging internal logs to ../outputs/LLAVA_7B_cosi/top1-20/wandb/run-20250329_013436-woy6qs03/logs/debug-internal.log
8
- 2025-03-29 01:34:36,726 INFO MainThread:125374 [wandb_init.py:init():761] calling init triggers
9
- 2025-03-29 01:34:36,726 INFO MainThread:125374 [wandb_init.py:init():766] wandb.init called with sweep_config: {}
10
- config: {'train_cfgs': {'save_checkpoint': False, 'load_checkpoint': False, 'ds_cfgs': 'ds_z3_config.json', 'epochs': 3, 'seed': 42, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 16, 'gradient_checkpointing': True, 'learning_rate': 2e-05, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.0, 'adam_betas': [0.9, 0.95], 'adam_epsilon': 1e-08, 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'freeze_mm_proj': False, 'freeze_vision_tower': True, 'freeze_language_model': False, 'max_grad_norm': 1.0}, 'data_cfgs': {'load_multi_datasets': False, 'train_datasets': '/aifs4su/yaodong/hantao/datasets/MMInstruct-GPT4V_mistral-7b_cosi_cut/merged/top1-20', 'train_template': 'MM_TI2T_LLAVA', 'train_size': {}, 'train_split': 'train', 'train_name': 'text-image-to-text', 'train_data_files': {}, 'train_optional_args': [], 'eval_datasets': {}, 'eval_template': {}, 'eval_name': {}, 'eval_size': {}, 'eval_split': {}, 'eval_subset': {}, 'eval_data_files': {}, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'sft', 'output_dir': '../outputs/LLAVA_7B_cosi/top1-20', 'cache_dir': {}, 'save_total_limit': 6}, 'model_cfgs': {'model_name_or_path': '/aifs4su/yaodong/hantao/models/llava-v1.6-mistral-7b-hf', 'trust_remote_code': True, 'model_max_length': 2048}, 'special_tokens': {}, '_wandb': {}}
11
- 2025-03-29 01:34:36,726 INFO MainThread:125374 [wandb_init.py:init():784] starting backend
12
- 2025-03-29 01:34:36,726 INFO MainThread:125374 [wandb_init.py:init():788] sending inform_init request
13
- 2025-03-29 01:34:36,730 INFO MainThread:125374 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
- 2025-03-29 01:34:36,730 INFO MainThread:125374 [wandb_init.py:init():798] backend started and connected
15
- 2025-03-29 01:34:36,731 INFO MainThread:125374 [wandb_init.py:init():891] updated telemetry
16
- 2025-03-29 01:34:36,742 INFO MainThread:125374 [wandb_init.py:init():915] communicating run to backend with 90.0 second timeout
17
- 2025-03-29 01:34:37,255 INFO MainThread:125374 [wandb_init.py:init():990] starting run threads in backend
18
- 2025-03-29 01:34:37,418 INFO MainThread:125374 [wandb_run.py:_console_start():2375] atexit reg
19
- 2025-03-29 01:34:37,419 INFO MainThread:125374 [wandb_run.py:_redirect():2227] redirect: wrap_raw
20
- 2025-03-29 01:34:37,419 INFO MainThread:125374 [wandb_run.py:_redirect():2292] Wrapping output streams.
21
- 2025-03-29 01:34:37,419 INFO MainThread:125374 [wandb_run.py:_redirect():2315] Redirects installed.
22
- 2025-03-29 01:34:37,421 INFO MainThread:125374 [wandb_init.py:init():1032] run started, returning control to user process
23
- 2025-03-29 05:50:20,142 INFO MainThread:125374 [wandb_run.py:_finish():2112] finishing run htlou/align-anything/woy6qs03
24
- 2025-03-29 05:50:20,143 INFO MainThread:125374 [wandb_run.py:_atexit_cleanup():2340] got exitcode: 0
25
- 2025-03-29 05:50:20,144 INFO MainThread:125374 [wandb_run.py:_restore():2322] restore
26
- 2025-03-29 05:50:20,144 INFO MainThread:125374 [wandb_run.py:_restore():2328] restore done
27
- 2025-03-29 05:50:21,145 INFO MainThread:125374 [wandb_run.py:_restore():2322] restore
28
- 2025-03-29 05:50:21,145 INFO MainThread:125374 [wandb_run.py:_restore():2328] restore done
29
- 2025-03-29 05:50:21,145 ERROR MainThread:125374 [wandb_run.py:_atexit_cleanup():2361] Problem finishing run
30
  Traceback (most recent call last):
31
  File "/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2352, in _atexit_cleanup
32
  self._on_finish()
@@ -44,4 +44,4 @@ Traceback (most recent call last):
44
  File "/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/concurrent/futures/thread.py", line 169, in submit
45
  raise RuntimeError('cannot schedule new futures after '
46
  RuntimeError: cannot schedule new futures after interpreter shutdown
47
- 2025-03-29 05:50:21,496 INFO MsgRouterThr:125374 [mailbox.py:close():129] Closing mailbox, abandoning 2 handles.
 
1
+ 2025-04-01 09:28:24,067 INFO MainThread:516629 [wandb_setup.py:_flush():67] Current SDK version is 0.19.8
2
+ 2025-04-01 09:28:24,068 INFO MainThread:516629 [wandb_setup.py:_flush():67] Configure stats pid to 516629
3
+ 2025-04-01 09:28:24,068 INFO MainThread:516629 [wandb_setup.py:_flush():67] Loading settings from /home/yangyaodong/.config/wandb/settings
4
+ 2025-04-01 09:28:24,068 INFO MainThread:516629 [wandb_setup.py:_flush():67] Loading settings from /aifs4su/yaodong/hantao/align-anything/scripts/wandb/settings
5
+ 2025-04-01 09:28:24,068 INFO MainThread:516629 [wandb_setup.py:_flush():67] Loading settings from environment variables
6
+ 2025-04-01 09:28:24,068 INFO MainThread:516629 [wandb_init.py:setup_run_log_directory():647] Logging user logs to ../outputs/llava_1.6_vicuna_7B_cosi/top1-20/wandb/run-20250401_092824-bd2o51v8/logs/debug.log
7
+ 2025-04-01 09:28:24,069 INFO MainThread:516629 [wandb_init.py:setup_run_log_directory():648] Logging internal logs to ../outputs/llava_1.6_vicuna_7B_cosi/top1-20/wandb/run-20250401_092824-bd2o51v8/logs/debug-internal.log
8
+ 2025-04-01 09:28:24,069 INFO MainThread:516629 [wandb_init.py:init():761] calling init triggers
9
+ 2025-04-01 09:28:24,069 INFO MainThread:516629 [wandb_init.py:init():766] wandb.init called with sweep_config: {}
10
+ config: {'train_cfgs': {'save_checkpoint': False, 'load_checkpoint': False, 'ds_cfgs': 'ds_z3_config.json', 'epochs': 3, 'seed': 42, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 1, 'gradient_checkpointing': True, 'learning_rate': 1e-06, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.0, 'adam_betas': [0.9, 0.95], 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'regularization': 0.001, 'scale_coeff': 0.1, 'freeze_mm_proj': False, 'freeze_vision_tower': True, 'freeze_language_model': False}, 'data_cfgs': {'train_datasets': '/aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-7b_cosi_cut/merged/top1-20', 'train_template': 'AA_TI2T_LLAVA', 'train_size': {}, 'train_split': 'train', 'train_name': 'text-image-to-text', 'train_data_files': {}, 'train_optional_args': [], 'eval_datasets': {}, 'eval_template': {}, 'eval_size': {}, 'eval_split': {}, 'eval_subset': {}, 'eval_data_files': {}, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'dpo', 'output_dir': '../outputs/llava_1.6_vicuna_7B_cosi/top1-20', 'cache_dir': {}, 'save_total_limit': 3}, 'model_cfgs': {'model_name_or_path': '/aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-7b-hf', 'trust_remote_code': True, 'model_max_length': 4096}, 'special_tokens': {}, '_wandb': {}}
11
+ 2025-04-01 09:28:24,069 INFO MainThread:516629 [wandb_init.py:init():784] starting backend
12
+ 2025-04-01 09:28:24,069 INFO MainThread:516629 [wandb_init.py:init():788] sending inform_init request
13
+ 2025-04-01 09:28:24,077 INFO MainThread:516629 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-04-01 09:28:24,077 INFO MainThread:516629 [wandb_init.py:init():798] backend started and connected
15
+ 2025-04-01 09:28:24,079 INFO MainThread:516629 [wandb_init.py:init():891] updated telemetry
16
+ 2025-04-01 09:28:24,101 INFO MainThread:516629 [wandb_init.py:init():915] communicating run to backend with 90.0 second timeout
17
+ 2025-04-01 09:28:24,710 INFO MainThread:516629 [wandb_init.py:init():990] starting run threads in backend
18
+ 2025-04-01 09:28:25,153 INFO MainThread:516629 [wandb_run.py:_console_start():2375] atexit reg
19
+ 2025-04-01 09:28:25,153 INFO MainThread:516629 [wandb_run.py:_redirect():2227] redirect: wrap_raw
20
+ 2025-04-01 09:28:25,153 INFO MainThread:516629 [wandb_run.py:_redirect():2292] Wrapping output streams.
21
+ 2025-04-01 09:28:25,153 INFO MainThread:516629 [wandb_run.py:_redirect():2315] Redirects installed.
22
+ 2025-04-01 09:28:25,159 INFO MainThread:516629 [wandb_init.py:init():1032] run started, returning control to user process
23
+ 2025-04-01 11:33:47,217 INFO MainThread:516629 [wandb_run.py:_finish():2112] finishing run htlou/align-anything/bd2o51v8
24
+ 2025-04-01 11:33:47,219 INFO MainThread:516629 [wandb_run.py:_atexit_cleanup():2340] got exitcode: 0
25
+ 2025-04-01 11:33:47,220 INFO MainThread:516629 [wandb_run.py:_restore():2322] restore
26
+ 2025-04-01 11:33:47,220 INFO MainThread:516629 [wandb_run.py:_restore():2328] restore done
27
+ 2025-04-01 11:33:48,221 INFO MainThread:516629 [wandb_run.py:_restore():2322] restore
28
+ 2025-04-01 11:33:48,221 INFO MainThread:516629 [wandb_run.py:_restore():2328] restore done
29
+ 2025-04-01 11:33:48,221 ERROR MainThread:516629 [wandb_run.py:_atexit_cleanup():2361] Problem finishing run
30
  Traceback (most recent call last):
31
  File "/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2352, in _atexit_cleanup
32
  self._on_finish()
 
44
  File "/aifs4su/yaodong/miniconda3/envs/hantao_llama/lib/python3.11/concurrent/futures/thread.py", line 169, in submit
45
  raise RuntimeError('cannot schedule new futures after '
46
  RuntimeError: cannot schedule new futures after interpreter shutdown
47
+ 2025-04-01 11:33:48,723 INFO MsgRouterThr:516629 [mailbox.py:close():129] Closing mailbox, abandoning 2 handles.
wandb/run-20250401_092824-bd2o51v8/files/config.yaml ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.19.8
4
+ m: []
5
+ python_version: 3.11.11
6
+ t:
7
+ "1":
8
+ - 1
9
+ - 5
10
+ - 11
11
+ - 41
12
+ - 49
13
+ - 51
14
+ - 53
15
+ - 55
16
+ - 63
17
+ - 71
18
+ - 83
19
+ - 98
20
+ - 105
21
+ "2":
22
+ - 1
23
+ - 5
24
+ - 11
25
+ - 41
26
+ - 49
27
+ - 51
28
+ - 53
29
+ - 55
30
+ - 63
31
+ - 71
32
+ - 83
33
+ - 98
34
+ - 105
35
+ "3":
36
+ - 2
37
+ - 13
38
+ - 16
39
+ - 23
40
+ - 55
41
+ - 61
42
+ "4": 3.11.11
43
+ "5": 0.19.8
44
+ "6": 4.50.0
45
+ "8":
46
+ - 5
47
+ "12": 0.19.8
48
+ "13": linux-x86_64
49
+ data_cfgs:
50
+ value:
51
+ eval_optional_args: []
52
+ train_datasets: /aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-7b_cosi_cut/merged/top1-20
53
+ train_name: text-image-to-text
54
+ train_optional_args: []
55
+ train_split: train
56
+ train_template: AA_TI2T_LLAVA
57
+ logger_cfgs:
58
+ value:
59
+ log_project: align-anything
60
+ log_run_name: dpo
61
+ log_type: wandb
62
+ output_dir: ../outputs/llava_1.6_vicuna_7B_cosi/top1-20
63
+ save_total_limit: 3
64
+ model_cfgs:
65
+ value:
66
+ model_max_length: 4096
67
+ model_name_or_path: /aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-7b-hf
68
+ trust_remote_code: true
69
+ train_cfgs:
70
+ value:
71
+ adam_betas:
72
+ - 0.9
73
+ - 0.95
74
+ bf16: true
75
+ ds_cfgs: ds_z3_config.json
76
+ epochs: 3
77
+ eval_interval: 10
78
+ eval_strategy: epoch
79
+ fp16: false
80
+ freeze_language_model: false
81
+ freeze_mm_proj: false
82
+ freeze_vision_tower: true
83
+ gradient_accumulation_steps: 1
84
+ gradient_checkpointing: true
85
+ learning_rate: 1e-06
86
+ load_checkpoint: false
87
+ lr_scheduler_type: cosine
88
+ lr_warmup_ratio: 0.03
89
+ per_device_eval_batch_size: 1
90
+ per_device_train_batch_size: 1
91
+ regularization: 0.001
92
+ save_checkpoint: false
93
+ scale_coeff: 0.1
94
+ seed: 42
95
+ weight_decay: 0
wandb/run-20250401_092824-bd2o51v8/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20250401_092824-bd2o51v8/files/requirements.txt ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ maskrcnn_benchmark==0.0.0
2
+ webdataset==0.2.111
3
+ websockets==15.0.1
4
+ typer==0.15.2
5
+ blobfile==3.0.0
6
+ pooch==1.8.2
7
+ filelock==3.18.0
8
+ referencing==0.36.2
9
+ matplotlib==3.10.1
10
+ cachetools==5.5.2
11
+ python-dateutil==2.9.0.post0
12
+ gmpy2==2.2.1
13
+ uvloop==0.21.0
14
+ nvidia-cusparselt-cu12==0.6.2
15
+ clip==0.2.0
16
+ httpcore==1.0.7
17
+ charset-normalizer==3.3.2
18
+ torchlibrosa==0.1.0
19
+ contourpy==1.3.1
20
+ multiprocess==0.70.16
21
+ nest-asyncio==1.6.0
22
+ Werkzeug==3.1.3
23
+ aiofiles==23.2.1
24
+ six==1.17.0
25
+ torch==2.6.0
26
+ sse-starlette==2.2.1
27
+ typing_extensions==4.12.2
28
+ xgrammar==0.1.16
29
+ psutil==7.0.0
30
+ kiwisolver==1.4.8
31
+ moviepy==2.1.2
32
+ frozenlist==1.5.0
33
+ jiter==0.9.0
34
+ einops==0.8.1
35
+ flash_attn==2.7.4.post1
36
+ PySocks==1.7.1
37
+ regex==2024.11.6
38
+ markdown-it-py==3.0.0
39
+ ruff==0.11.2
40
+ docker-pycreds==0.4.0
41
+ nvidia-nvtx-cu12==12.4.127
42
+ pyparsing==3.2.3
43
+ resampy==0.4.3
44
+ tokenizers==0.21.0
45
+ frechet-audio-distance==0.1.2
46
+ aiohappyeyeballs==2.6.1
47
+ llamafactory==0.9.3.dev0
48
+ msgspec==0.19.0
49
+ httpx==0.28.1
50
+ encodec==0.1.1
51
+ ffmpy==0.5.0
52
+ jsonschema==4.23.0
53
+ imageio-ffmpeg==0.6.0
54
+ mkl_random==1.2.8
55
+ fairscale==0.4.13
56
+ soxr==0.5.0.post1
57
+ lark==1.2.2
58
+ gradio==5.21.0
59
+ absl-py==2.2.1
60
+ dnspython==2.7.0
61
+ networkx==3.4.2
62
+ h5py==3.13.0
63
+ hjson==3.1.0
64
+ tensorboard==2.19.0
65
+ aiosignal==1.3.2
66
+ pip==25.0
67
+ nvidia-cublas-cu12==12.4.5.8
68
+ llguidance==0.7.11
69
+ zipp==3.21.0
70
+ ftfy==6.3.1
71
+ peft==0.15.0
72
+ attrs==25.3.0
73
+ trl==0.9.6
74
+ requests==2.32.3
75
+ progressbar==2.5
76
+ sniffio==1.3.1
77
+ pycountry==24.6.1
78
+ lxml==5.3.1
79
+ starlette==0.46.1
80
+ pytest==7.2.0
81
+ Markdown==3.7
82
+ mdurl==0.1.2
83
+ pyzmq==26.3.0
84
+ safetensors==0.5.3
85
+ opencv-python==4.6.0.66
86
+ prometheus-fastapi-instrumentator==7.1.0
87
+ shellingham==1.5.4
88
+ torchvision==0.21.0
89
+ pluggy==1.5.0
90
+ timm==1.0.15
91
+ multidict==6.2.0
92
+ semantic-version==2.10.0
93
+ airportsdata==20250224
94
+ numba==0.60.0
95
+ MarkupSafe==2.1.5
96
+ pydantic_core==2.33.0
97
+ imageio==2.37.0
98
+ nvidia-nccl-cu12==2.21.5
99
+ dill==0.3.8
100
+ msgpack==1.1.0
101
+ sentry-sdk==2.24.1
102
+ rpds-py==0.24.0
103
+ grpcio==1.71.0
104
+ fastrlock==0.8.3
105
+ python-json-logger==3.3.0
106
+ cffi==1.17.1
107
+ gradio_client==1.7.2
108
+ PyYAML==6.0.2
109
+ tensorboard-data-server==0.7.2
110
+ termcolor==2.5.0
111
+ torchaudio==2.6.0
112
+ triton==3.2.0
113
+ fastapi==0.115.12
114
+ clint==0.5.1
115
+ lazy_loader==0.4
116
+ depyf==0.18.0
117
+ mkl_fft==1.3.11
118
+ annotated-types==0.7.0
119
+ scikit-learn==1.6.1
120
+ wget==3.2
121
+ setuptools==75.8.0
122
+ args==0.1.0
123
+ certifi==2025.1.31
124
+ click==8.1.8
125
+ python-dotenv==1.1.0
126
+ laion_clap==1.1.5
127
+ Pygments==2.19.1
128
+ tomlkit==0.13.2
129
+ idna==3.7
130
+ propcache==0.3.1
131
+ platformdirs==4.3.7
132
+ align-anything==0.0.1.dev0
133
+ ray==2.44.1
134
+ cloudpickle==3.1.1
135
+ deepspeed==0.16.5
136
+ smmap==5.0.2
137
+ distro==1.9.0
138
+ fonttools==4.56.0
139
+ typing-inspection==0.4.0
140
+ braceexpand==0.1.7
141
+ decorator==5.2.1
142
+ diskcache==5.6.3
143
+ yt-dlp==2025.3.27
144
+ shtab==1.7.1
145
+ gguf==0.10.0
146
+ interegular==0.3.3
147
+ compressed-tensors==0.9.2
148
+ pandas==2.2.3
149
+ huggingface-hub==0.29.3
150
+ pyarrow==19.0.1
151
+ lm-format-enforcer==0.10.11
152
+ GitPython==3.1.44
153
+ xxhash==3.5.0
154
+ packaging==24.2
155
+ setproctitle==1.3.5
156
+ llvmlite==0.43.0
157
+ tiktoken==0.9.0
158
+ mpmath==1.3.0
159
+ email_validator==2.2.0
160
+ nvidia-ml-py==12.570.86
161
+ pydantic==2.11.0
162
+ xformers==0.0.29.post2
163
+ httptools==0.6.4
164
+ librosa==0.11.0
165
+ pytorch-fid==0.3.0
166
+ hpsv2==1.2.0
167
+ nvidia-cudnn-cu12==9.1.0.70
168
+ iniconfig==2.1.0
169
+ sympy==1.13.1
170
+ safehttpx==0.1.6
171
+ jsonschema-specifications==2024.10.1
172
+ Jinja2==3.1.6
173
+ tyro==0.8.14
174
+ h11==0.14.0
175
+ aiohttp==3.11.14
176
+ diffusers==0.32.2
177
+ tqdm==4.67.1
178
+ blake3==1.0.4
179
+ vllm==0.8.2
180
+ scipy==1.10.1
181
+ audioread==3.0.1
182
+ proglog==0.1.10
183
+ fire==0.7.0
184
+ sentencepiece==0.2.0
185
+ pytz==2025.2
186
+ nvidia-nvjitlink-cu12==12.4.127
187
+ nvidia-curand-cu12==10.3.5.147
188
+ numpy==1.26.4
189
+ tzdata==2025.2
190
+ python-multipart==0.0.20
191
+ urllib3==2.3.0
192
+ pycryptodomex==3.22.0
193
+ yarl==1.18.3
194
+ outlines==0.1.11
195
+ nvidia-cusolver-cu12==11.6.1.9
196
+ pydub==0.25.1
197
+ mistral_common==1.5.4
198
+ pycparser==2.22
199
+ pytest-split==0.8.0
200
+ datasets==3.4.1
201
+ soundfile==0.13.1
202
+ transformers==4.50.0
203
+ image-reward==1.5
204
+ wcwidth==0.2.13
205
+ nvidia-cuda-nvrtc-cu12==12.4.127
206
+ groovy==0.1.2
207
+ nvidia-cuda-runtime-cu12==12.4.127
208
+ astor==0.8.1
209
+ anyio==4.9.0
210
+ wandb==0.19.8
211
+ joblib==1.4.2
212
+ fsspec==2024.12.0
213
+ accelerate==1.5.2
214
+ py-cpuinfo==9.0.0
215
+ docstring_parser==0.16
216
+ partial-json-parser==0.2.1.1.post5
217
+ nvidia-cusparse-cu12==12.3.1.170
218
+ protobuf==3.20.3
219
+ outlines_core==0.1.26
220
+ nvidia-cufft-cu12==11.2.1.3
221
+ cycler==0.12.1
222
+ uvicorn==0.34.0
223
+ orjson==3.10.16
224
+ av==14.2.0
225
+ Brotli==1.0.9
226
+ cupy-cuda12x==13.4.1
227
+ openai==1.69.0
228
+ rich==13.9.4
229
+ importlib_metadata==8.6.1
230
+ ninja==1.11.1.4
231
+ wheel==0.45.1
232
+ pillow==10.4.0
233
+ prometheus_client==0.21.1
234
+ threadpoolctl==3.6.0
235
+ gitdb==4.0.12
236
+ watchfiles==1.0.4
237
+ nvidia-cuda-cupti-cu12==12.4.127
238
+ opencv-python-headless==4.11.0.86
239
+ mkl-service==2.4.0
240
+ rich-toolkit==0.14.0
241
+ fastapi-cli==0.0.7
242
+ llamafactory==0.9.3.dev0
243
+ typing_extensions==4.12.2
244
+ tomli==2.0.1
245
+ zipp==3.19.2
246
+ wheel==0.43.0
247
+ jaraco.text==3.12.1
248
+ packaging==24.2
249
+ autocommand==2.2.2
250
+ jaraco.functools==4.0.1
251
+ jaraco.collections==5.1.0
252
+ platformdirs==4.2.2
253
+ more-itertools==10.3.0
254
+ inflect==7.3.1
255
+ jaraco.context==5.3.0
256
+ typeguard==4.3.0
257
+ backports.tarfile==1.2.0
258
+ importlib_metadata==8.0.0
wandb/run-20250401_092824-bd2o51v8/files/wandb-metadata.json ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-1040-nvidia-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.11.11",
4
+ "startedAt": "2025-04-01T01:28:24.078287Z",
5
+ "args": [
6
+ "--local_rank=0",
7
+ "--model_name_or_path",
8
+ "/aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-7b-hf",
9
+ "--train_datasets",
10
+ "/aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-7b_cosi_cut/merged/top1-20",
11
+ "--train_template",
12
+ "AA_TI2T_LLAVA",
13
+ "--train_split",
14
+ "train",
15
+ "--train_name",
16
+ "text-image-to-text",
17
+ "--output_dir",
18
+ "../outputs/llava_1.6_vicuna_7B_cosi/top1-20",
19
+ "--save_total_limit",
20
+ "3",
21
+ "--train_batch_size",
22
+ "8",
23
+ "--epochs",
24
+ "3"
25
+ ],
26
+ "program": "-m align_anything.trainers.text_image_to_text.dpo",
27
+ "git": {
28
+ "remote": "git@github.com-hantao:PKU-Alignment/align-anything.git",
29
+ "commit": "106588f9802757a3283c1aff1f33ea9afd737f31"
30
+ },
31
+ "email": "2200017789@stu.pku.edu.cn",
32
+ "root": "../outputs/llava_1.6_vicuna_7B_cosi/top1-20",
33
+ "host": "dgx-075",
34
+ "executable": "/aifs4su/yaodong/miniconda3/envs/hantao_llama/bin/python",
35
+ "cpu_count": 112,
36
+ "cpu_count_logical": 224,
37
+ "gpu": "NVIDIA H800",
38
+ "gpu_count": 8,
39
+ "disk": {
40
+ "/": {
41
+ "total": "1888556142592",
42
+ "used": "44068241408"
43
+ }
44
+ },
45
+ "memory": {
46
+ "total": "2164195545088"
47
+ },
48
+ "cpu": {
49
+ "count": 112,
50
+ "countLogical": 224
51
+ },
52
+ "gpu_nvidia": [
53
+ {
54
+ "name": "NVIDIA H800",
55
+ "memoryTotal": "85520809984",
56
+ "cudaCores": 16896,
57
+ "architecture": "Hopper"
58
+ },
59
+ {
60
+ "name": "NVIDIA H800",
61
+ "memoryTotal": "85520809984",
62
+ "cudaCores": 16896,
63
+ "architecture": "Hopper"
64
+ },
65
+ {
66
+ "name": "NVIDIA H800",
67
+ "memoryTotal": "85520809984",
68
+ "cudaCores": 16896,
69
+ "architecture": "Hopper"
70
+ },
71
+ {
72
+ "name": "NVIDIA H800",
73
+ "memoryTotal": "85520809984",
74
+ "cudaCores": 16896,
75
+ "architecture": "Hopper"
76
+ },
77
+ {
78
+ "name": "NVIDIA H800",
79
+ "memoryTotal": "85520809984",
80
+ "cudaCores": 16896,
81
+ "architecture": "Hopper"
82
+ },
83
+ {
84
+ "name": "NVIDIA H800",
85
+ "memoryTotal": "85520809984",
86
+ "cudaCores": 16896,
87
+ "architecture": "Hopper"
88
+ },
89
+ {
90
+ "name": "NVIDIA H800",
91
+ "memoryTotal": "85520809984",
92
+ "cudaCores": 16896,
93
+ "architecture": "Hopper"
94
+ },
95
+ {
96
+ "name": "NVIDIA H800",
97
+ "memoryTotal": "85520809984",
98
+ "cudaCores": 16896,
99
+ "architecture": "Hopper"
100
+ }
101
+ ],
102
+ "slurm": {
103
+ "conf": "/cm/shared/apps/slurm/var/etc/slurm/slurm.conf"
104
+ },
105
+ "cudaVersion": "12.2"
106
+ }
wandb/run-20250401_092824-bd2o51v8/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/reward":-14.6875,"train/better_sample_reward":-2.265625,"train/lr":0,"_timestamp":1.7434783858305552e+09,"train/step":2919,"train/worse_sample_reward":-12.4375,"train/reward_margin":10.1875,"_runtime":7523.179692948,"_wandb":{"runtime":7523},"train/loss":0.00011682510375976562,"_step":2919,"train/epoch":3,"train/reward_accuracy":1}
wandb/run-20250401_092824-bd2o51v8/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-04-01T09:28:23.49254425+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpk169qwgv/port-516629.txt","pid":516629,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
2
+ {"time":"2025-04-01T09:28:23.493893758+08:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":516629}
3
+ {"time":"2025-04-01T09:28:23.493865904+08:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":38869,"Zone":""}}
4
+ {"time":"2025-04-01T09:28:23.644131169+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51766"}
5
+ {"time":"2025-04-01T09:28:24.079737159+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"bd2o51v8","id":"127.0.0.1:51766"}
6
+ {"time":"2025-04-01T09:28:24.302760102+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"bd2o51v8","id":"127.0.0.1:51766"}
7
+ {"time":"2025-04-01T11:33:48.724033921+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51766"}
8
+ {"time":"2025-04-01T11:33:48.724433364+08:00","level":"INFO","msg":"connection: closing","id":"127.0.0.1:51766"}
9
+ {"time":"2025-04-01T11:33:48.724466682+08:00","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:51766"}
10
+ {"time":"2025-04-01T11:33:48.724456672+08:00","level":"INFO","msg":"server is shutting down"}
11
+ {"time":"2025-04-01T11:33:48.895059085+08:00","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:38869->127.0.0.1:51766: use of closed network connection","id":"127.0.0.1:51766"}
12
+ {"time":"2025-04-01T11:33:48.895255575+08:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51766"}
13
+ {"time":"2025-04-01T11:33:48.895267229+08:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51766"}
14
+ {"time":"2025-04-01T11:33:48.895272478+08:00","level":"INFO","msg":"server is closed"}
wandb/run-20250401_092824-bd2o51v8/logs/debug-internal.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-04-01T09:28:24.081265347+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.8","symlink path":"../outputs/llava_1.6_vicuna_7B_cosi/top1-20/wandb/run-20250401_092824-bd2o51v8/logs/debug-core.log"}
2
+ {"time":"2025-04-01T09:28:24.302680251+08:00","level":"INFO","msg":"created new stream","id":"bd2o51v8"}
3
+ {"time":"2025-04-01T09:28:24.302754002+08:00","level":"INFO","msg":"stream: started","id":"bd2o51v8"}
4
+ {"time":"2025-04-01T09:28:24.302817732+08:00","level":"INFO","msg":"handler: started","stream_id":"bd2o51v8"}
5
+ {"time":"2025-04-01T09:28:24.302840797+08:00","level":"INFO","msg":"sender: started","stream_id":"bd2o51v8"}
6
+ {"time":"2025-04-01T09:28:24.302883549+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"bd2o51v8"}
7
+ {"time":"2025-04-01T09:28:24.717791905+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-04-01T11:33:47.258061087+08:00","level":"INFO","msg":"Stopping system monitor"}
9
+ {"time":"2025-04-01T11:33:47.265278587+08:00","level":"INFO","msg":"Stopped system monitor"}
10
+ {"time":"2025-04-01T11:33:48.674037127+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
11
+ {"time":"2025-04-01T11:33:48.72438635+08:00","level":"INFO","msg":"stream: closing","id":"bd2o51v8"}
12
+ {"time":"2025-04-01T11:33:48.724423251+08:00","level":"WARN","msg":"sender: received Exit record more than once, ignoring"}
13
+ {"time":"2025-04-01T11:33:48.894912192+08:00","level":"INFO","msg":"handler: closed","stream_id":"bd2o51v8"}
14
+ {"time":"2025-04-01T11:33:48.89495363+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"bd2o51v8"}
15
+ {"time":"2025-04-01T11:33:48.895096718+08:00","level":"INFO","msg":"sender: closed","stream_id":"bd2o51v8"}
16
+ {"time":"2025-04-01T11:33:48.895192408+08:00","level":"INFO","msg":"stream: closed","id":"bd2o51v8"}