htlou commited on
Commit
f5d339e
·
verified ·
1 Parent(s): 9cf3038

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. debug-internal.log +7 -7
  2. debug.log +26 -24
debug-internal.log CHANGED
@@ -1,7 +1,7 @@
1
- {"time":"2025-03-29T01:22:05.252520599+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.8","symlink path":"../outputs/test_7B/wandb/run-20250329_012205-co1ecmky/logs/debug-core.log"}
2
- {"time":"2025-03-29T01:22:05.467254306+08:00","level":"INFO","msg":"created new stream","id":"co1ecmky"}
3
- {"time":"2025-03-29T01:22:05.467309548+08:00","level":"INFO","msg":"stream: started","id":"co1ecmky"}
4
- {"time":"2025-03-29T01:22:05.467331707+08:00","level":"INFO","msg":"handler: started","stream_id":"co1ecmky"}
5
- {"time":"2025-03-29T01:22:05.467333162+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"co1ecmky"}
6
- {"time":"2025-03-29T01:22:05.467336174+08:00","level":"INFO","msg":"sender: started","stream_id":"co1ecmky"}
7
- {"time":"2025-03-29T01:22:05.772490021+08:00","level":"INFO","msg":"Starting system monitor"}
 
1
+ {"time":"2025-03-28T20:38:44.692258573+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.8","symlink path":"../outputs/test_13B/wandb/offline-run-20250328_203844-gc2qytpj/logs/debug-core.log"}
2
+ {"time":"2025-03-28T20:38:44.90653769+08:00","level":"INFO","msg":"created new stream","id":"gc2qytpj"}
3
+ {"time":"2025-03-28T20:38:44.90660149+08:00","level":"INFO","msg":"stream: started","id":"gc2qytpj"}
4
+ {"time":"2025-03-28T20:38:44.906615038+08:00","level":"INFO","msg":"handler: started","stream_id":"gc2qytpj"}
5
+ {"time":"2025-03-28T20:38:44.906633311+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"gc2qytpj"}
6
+ {"time":"2025-03-28T20:38:44.90663431+08:00","level":"INFO","msg":"sender: started","stream_id":"gc2qytpj"}
7
+ {"time":"2025-03-28T20:38:44.910439284+08:00","level":"INFO","msg":"Starting system monitor"}
debug.log CHANGED
@@ -1,24 +1,26 @@
1
- 2025-03-29 01:22:05,246 INFO MainThread:104999 [wandb_setup.py:_flush():67] Current SDK version is 0.19.8
2
- 2025-03-29 01:22:05,246 INFO MainThread:104999 [wandb_setup.py:_flush():67] Configure stats pid to 104999
3
- 2025-03-29 01:22:05,246 INFO MainThread:104999 [wandb_setup.py:_flush():67] Loading settings from /home/yangyaodong/.config/wandb/settings
4
- 2025-03-29 01:22:05,246 INFO MainThread:104999 [wandb_setup.py:_flush():67] Loading settings from /aifs4su/yaodong/hantao/align-anything/scripts/wandb/settings
5
- 2025-03-29 01:22:05,246 INFO MainThread:104999 [wandb_setup.py:_flush():67] Loading settings from environment variables
6
- 2025-03-29 01:22:05,246 INFO MainThread:104999 [wandb_init.py:setup_run_log_directory():647] Logging user logs to ../outputs/test_7B/wandb/run-20250329_012205-co1ecmky/logs/debug.log
7
- 2025-03-29 01:22:05,246 INFO MainThread:104999 [wandb_init.py:setup_run_log_directory():648] Logging internal logs to ../outputs/test_7B/wandb/run-20250329_012205-co1ecmky/logs/debug-internal.log
8
- 2025-03-29 01:22:05,246 INFO MainThread:104999 [wandb_init.py:init():761] calling init triggers
9
- 2025-03-29 01:22:05,246 INFO MainThread:104999 [wandb_init.py:init():766] wandb.init called with sweep_config: {}
10
- config: {'train_cfgs': {'save_checkpoint': False, 'load_checkpoint': False, 'ds_cfgs': 'ds_z3_config.json', 'epochs': 3, 'seed': 42, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 16, 'gradient_checkpointing': True, 'learning_rate': 2e-05, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.0, 'adam_betas': [0.9, 0.95], 'adam_epsilon': 1e-08, 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'freeze_mm_proj': False, 'freeze_vision_tower': True, 'freeze_language_model': False, 'max_grad_norm': 1.0}, 'data_cfgs': {'load_multi_datasets': False, 'train_datasets': '/aifs4su/yaodong/hantao/datasets/MMInstruct-GPT4V_mistral-7b_cosi_cut/merged/top1-10', 'train_template': 'MM_TI2T_LLAVA', 'train_size': {}, 'train_split': 'train', 'train_name': 'text-image-to-text', 'train_data_files': {}, 'train_optional_args': [], 'eval_datasets': {}, 'eval_template': {}, 'eval_name': {}, 'eval_size': {}, 'eval_split': {}, 'eval_subset': {}, 'eval_data_files': {}, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'sft', 'output_dir': '../outputs/test_7B', 'cache_dir': {}, 'save_total_limit': 6}, 'model_cfgs': {'model_name_or_path': '/aifs4su/yaodong/hantao/models/llava-v1.6-mistral-7b-hf', 'trust_remote_code': True, 'model_max_length': 2048}, 'special_tokens': {}, '_wandb': {}}
11
- 2025-03-29 01:22:05,246 INFO MainThread:104999 [wandb_init.py:init():784] starting backend
12
- 2025-03-29 01:22:05,246 INFO MainThread:104999 [wandb_init.py:init():788] sending inform_init request
13
- 2025-03-29 01:22:05,249 INFO MainThread:104999 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
- 2025-03-29 01:22:05,249 INFO MainThread:104999 [wandb_init.py:init():798] backend started and connected
15
- 2025-03-29 01:22:05,251 INFO MainThread:104999 [wandb_init.py:init():891] updated telemetry
16
- 2025-03-29 01:22:05,262 INFO MainThread:104999 [wandb_init.py:init():915] communicating run to backend with 90.0 second timeout
17
- 2025-03-29 01:22:05,770 INFO MainThread:104999 [wandb_init.py:init():990] starting run threads in backend
18
- 2025-03-29 01:22:05,989 INFO MainThread:104999 [wandb_run.py:_console_start():2375] atexit reg
19
- 2025-03-29 01:22:05,990 INFO MainThread:104999 [wandb_run.py:_redirect():2227] redirect: wrap_raw
20
- 2025-03-29 01:22:05,990 INFO MainThread:104999 [wandb_run.py:_redirect():2292] Wrapping output streams.
21
- 2025-03-29 01:22:05,990 INFO MainThread:104999 [wandb_run.py:_redirect():2315] Redirects installed.
22
- 2025-03-29 01:22:05,992 INFO MainThread:104999 [wandb_init.py:init():1032] run started, returning control to user process
23
- 2025-03-29 01:26:51,278 INFO MainThread:104999 [wandb_run.py:_finish():2112] finishing run htlou/align-anything/co1ecmky
24
- 2025-03-29 01:26:51,278 INFO MainThread:104999 [wandb_run.py:_atexit_cleanup():2340] got exitcode: 0
 
 
 
1
+ 2025-03-28 20:38:44,447 INFO MainThread:3871311 [wandb_setup.py:_flush():67] Current SDK version is 0.19.8
2
+ 2025-03-28 20:38:44,447 INFO MainThread:3871311 [wandb_setup.py:_flush():67] Configure stats pid to 3871311
3
+ 2025-03-28 20:38:44,447 INFO MainThread:3871311 [wandb_setup.py:_flush():67] Loading settings from /home/yangyaodong/.config/wandb/settings
4
+ 2025-03-28 20:38:44,447 INFO MainThread:3871311 [wandb_setup.py:_flush():67] Loading settings from /aifs4su/yaodong/hantao/align-anything/scripts/wandb/settings
5
+ 2025-03-28 20:38:44,447 INFO MainThread:3871311 [wandb_setup.py:_flush():67] Loading settings from environment variables
6
+ 2025-03-28 20:38:44,447 INFO MainThread:3871311 [wandb_init.py:setup_run_log_directory():647] Logging user logs to ../outputs/test_13B/wandb/offline-run-20250328_203844-gc2qytpj/logs/debug.log
7
+ 2025-03-28 20:38:44,447 INFO MainThread:3871311 [wandb_init.py:setup_run_log_directory():648] Logging internal logs to ../outputs/test_13B/wandb/offline-run-20250328_203844-gc2qytpj/logs/debug-internal.log
8
+ 2025-03-28 20:38:44,447 INFO MainThread:3871311 [wandb_init.py:init():761] calling init triggers
9
+ 2025-03-28 20:38:44,447 INFO MainThread:3871311 [wandb_init.py:init():766] wandb.init called with sweep_config: {}
10
+ config: {'train_cfgs': {'save_checkpoint': True, 'load_checkpoint': False, 'ds_cfgs': 'ds_z3_config.json', 'epochs': 3, 'seed': 42, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 1, 'gradient_checkpointing': True, 'learning_rate': 1e-06, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.0, 'adam_betas': [0.9, 0.95], 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'regularization': 0.001, 'scale_coeff': 0.1, 'freeze_mm_proj': False, 'freeze_vision_tower': True, 'freeze_language_model': False}, 'data_cfgs': {'train_datasets': '/aifs4su/yaodong/hantao/datasets/AA_preference_vicuna-13b_cosi_cut/merged/top1-10', 'train_template': 'AA_TI2T_LLAVA', 'train_size': {}, 'train_split': 'train', 'train_name': 'text-image-to-text', 'train_data_files': {}, 'train_optional_args': [], 'eval_datasets': {}, 'eval_template': {}, 'eval_size': {}, 'eval_split': {}, 'eval_subset': {}, 'eval_data_files': {}, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'dpo', 'output_dir': '../outputs/test_13B', 'cache_dir': {}, 'save_total_limit': 6}, 'model_cfgs': {'model_name_or_path': '/aifs4su/yaodong/hantao/models/llava-v1.6-vicuna-13b-hf', 'trust_remote_code': True, 'model_max_length': 2048}, 'special_tokens': {}, '_wandb': {}}
11
+ 2025-03-28 20:38:44,447 INFO MainThread:3871311 [wandb_init.py:init():784] starting backend
12
+ 2025-03-28 20:38:44,657 INFO MainThread:3871311 [wandb_init.py:init():788] sending inform_init request
13
+ 2025-03-28 20:38:44,660 INFO MainThread:3871311 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-03-28 20:38:44,660 INFO MainThread:3871311 [wandb_init.py:init():798] backend started and connected
15
+ 2025-03-28 20:38:44,662 INFO MainThread:3871311 [wandb_init.py:init():891] updated telemetry
16
+ 2025-03-28 20:38:44,672 INFO MainThread:3871311 [wandb_init.py:init():915] communicating run to backend with 90.0 second timeout
17
+ 2025-03-28 20:38:44,908 INFO MainThread:3871311 [wandb_init.py:init():990] starting run threads in backend
18
+ 2025-03-28 20:38:45,071 INFO MainThread:3871311 [wandb_run.py:_console_start():2375] atexit reg
19
+ 2025-03-28 20:38:45,071 INFO MainThread:3871311 [wandb_run.py:_redirect():2227] redirect: wrap_raw
20
+ 2025-03-28 20:38:45,071 INFO MainThread:3871311 [wandb_run.py:_redirect():2292] Wrapping output streams.
21
+ 2025-03-28 20:38:45,071 INFO MainThread:3871311 [wandb_run.py:_redirect():2315] Redirects installed.
22
+ 2025-03-28 20:38:45,074 INFO MainThread:3871311 [wandb_init.py:init():1032] run started, returning control to user process
23
+ 2025-03-28 20:47:40,230 INFO MainThread:3871311 [wandb_run.py:_finish():2112] finishing run align-anything/gc2qytpj
24
+ 2025-03-28 20:47:40,230 INFO MainThread:3871311 [wandb_run.py:_atexit_cleanup():2340] got exitcode: 0
25
+ 2025-03-28 20:47:40,230 INFO MainThread:3871311 [wandb_run.py:_restore():2322] restore
26
+ 2025-03-28 20:47:40,230 INFO MainThread:3871311 [wandb_run.py:_restore():2328] restore done