| # Usage | |
| ```shell | |
| # accelerate launch --config_file amlt_configs/accelerate_deepspeed_config.local.yaml \ | |
| python \ | |
| -m src.train \ | |
| train_data='[vg-densecap-local]' eval_data='[vg-densecap-local]' \ | |
| +model=base_sca_multitask_v2 \ | |
| model.cache_dir=.model.cache/ \ | |
| training.do_train=True \ | |
| training.do_eval=True \ | |
| training.fp16=True \ | |
| training.num_masks_per_sample=16 \ | |
| training.per_device_train_batch_size=1 \ | |
| training.dataloader_num_workers=4 \ | |
| training.max_steps=99 \ | |
| training.logging_first_step=True \ | |
| training.logging_steps=5 \ | |
| training.evaluate_before_train=True \ | |
| training.max_eval_samples=3 \ | |
| training.eval_steps=50 \ | |
| training.save_steps=50 \ | |
| wandb.log=False \ | |
| training.lr_scheduler_type=cosine \ | |
| +data_transforms=lsj-0_1-2_0 \ | |
| model.lm_head_model_name_or_path=gpt2 \ | |
| model.sam_model_name_or_path=facebook/sam-vit-base | |
| # model.lm_head_model_name_or_path=openlm-research/open_llama_3b_v2 | |
| # To use llama, you need to install sentencepiece | |
| # training.gradient_checkpointing=true | |
| # Use extra args in data module | |
| # train_data_overrides='[data.streaming\=True]' | |
| ``` | |
| SCA | |
| Training. | |
| ```shell | |
| python \ | |
| -m src.train \ | |
| train_data='[vg-densecap-local]' eval_data='[vg-densecap-local]' \ | |
| +model=base_sca \ | |
| training.do_train=True \ | |
| training.do_eval=True \ | |
| training.num_masks_per_sample=32 \ | |
| # training.num_masks_per_sample=10 \ | |
| # training.num_masks_per_sample=4 \ | |
| +data.streaming=False \ | |
| training.per_device_train_batch_size=1 \ | |
| training.fp16=True \ | |
| # model.lm_head_model_name_or_path=gpt2-large \ | |
| # model.lm_head_model_name_or_path=gpt2-xl \ | |
| training.dataloader_num_workers=4 \ | |
| training.logging_first_step=True \ | |
| training.trainable_params='[mask_decoder.additional_transformer,mask_decoder.caption_tokens,task_tokens,language_project,language_model]' | |
| +training.custom_param_lrs='{language_model:1e-5}' | |
| training.compute_metrics=null # Computer METEOR during training. If ture, use generate, about 0.4 it/s on A100; false or null, only compute loss, 1.5 it/s | |
| ``` | |
| Inference. | |
| ```shell | |
| python \ | |
| -m src.train \ | |
| train_data='[vg-densecap-local]' eval_data='[vg-densecap-local]' \ | |
| +model=base_sca \ | |
| training.do_train=False \ | |
| training.do_eval=False \ | |
| training.do_inference=True \ | |
| training.output_dir=amlt/train-sca-vg_densecap-081023/gpt2-large/ \ | |
| wandb.log=False \ # training.fp16_full_eval=True | |
| model.model_name_or_path=amlt/train-sca-vg_densecap-081023/gpt2-large/checkpoint-9000 \ | |
| # FIXME: when load weights from existing sca model, we should use the same tokenizer as the existing sca model | |
| # model.lm_head_model_name_or_path=$(grep lm_head_model_name_or_path $AMLT_MAP_INPUT_DIR/.hydra/config.yaml | tail -n1 | sed 's/ *//g' | cut -d ':' -f2) | |
| # model.sam_model_name_or_path=$(grep sam_model_name_or_path $AMLT_MAP_INPUT_DIR/.hydra/config.yaml | tail -n1 | sed 's/ *//g' | cut -d ':' -f2) | |
| ``` | |
| ## Data Configs | |
| ```shell | |
| src/conf/data | |
| βββ coco_caption-pseudo_region.yaml | |
| βββ coco-instance-local.yaml | |
| βββ coco-instance-task_type_caption-local.yaml | |
| βββ coco-instance-task_type_caption.yaml | |
| βββ coco-instance.yaml | |
| βββ objects365-local.yaml | |
| βββ objects365-task_type_caption-local.yaml | |
| βββ refclef-berkeley.yaml | |
| βββ refclef-unc.yaml | |
| βββ refcocog-google.yaml | |
| βββ refcoco-google.yaml | |
| βββ refcocog-umd.yaml | |
| βββ refcoco+-unc-split_testA.yaml | |
| βββ refcoco-unc-split_testA.yaml | |
| βββ refcoco+-unc-split_testB.yaml | |
| βββ refcoco-unc-split_testB.yaml | |
| βββ refcoco+-unc.yaml | |
| βββ refcoco-unc.yaml | |
| βββ sa1b-cap-streaming-hard_code_filter-num_tars_11.yaml | |
| βββ sa1b-cap-streaming-hard_code_filter-num_tars_2.yaml | |
| βββ sa1b-cap-streaming-hard_code_filter-num_tars_6.yaml | |
| βββ sa1b-cap-streaming-num_tars_11.yaml | |
| βββ sa1b-cap-streaming-num_tars_2.yaml | |
| βββ sa1b-cap-streaming-num_tars_6.yaml | |
| βββ sa1b-cap-streaming.yaml | |
| βββ sbu-pseudo_region-local.yaml | |
| βββ sbu-pseudo_region.yaml | |
| βββ v3det-local.yaml | |
| βββ v3det-task_type_caption-local.yaml | |
| βββ vg-densecap-local.yaml | |
| βββ vg-densecap-mask_region_descriptions.yaml | |
| βββ vg-densecap-region_descriptions.yaml | |
| βββ vg_densecap.yaml | |
| βββ vg-full-vg-densecap-mask_region_descriptions.yaml | |
| βββ vg-full-vg-densecap-region_descriptions.yaml | |
| βββ vg-grit-local.yaml | |
| ``` | |
| ## Debug | |
| Use vscode debugger, the config is in `.vscode/launch.json`. | |
| ```shell | |
| python -m debugpy --wait-for-client --listen 0.0.0.0:5678 \ | |
| -m src.train \ | |
| train_data='[vg-densecap-region_descriptions]' eval_data='[vg-densecap-region_descriptions]' \ | |
| +model=base_sam_captioner \ | |
| training.do_train=True \ | |
| training.do_eval=True \ | |
| training.num_masks_per_sample=6 \ | |
| +data.streaming=False \ | |
| # sample | |
| training.max_eval_samples=1 \ | |
| training.max_train_samples=1 \ | |
| # logging training step | |
| training.logging_steps=5 \ | |
| # eval | |
| training.evaluation_strategy=steps \ | |
| training.eval_steps=5 \ | |
| # num_stape | |
| training.max_steps=1000 \ | |
| # save model | |
| training.save_strategy=steps \ | |
| training.save_steps=10 \ | |
| training.save_total_limit=2 \ | |
| # optimizer | |
| training.optim=adamw_torch | |
| training.learning_rate=5e-5 | |
| # wandb | |
| wandb.log=False | |
| wandb.project=sca | |
| wandb.group=debug | |
| wandb.name=sca-debug | |
| # test | |
| training.evaluate_before_train=False \ | |
| # Set log_level in `transformer` to `info`. By default, it is `warning`. | |
| # debug - 10; info - 20; warning - 30; error - 40; critical - 50; | |
| # by default, it is `passive` which is 30. | |
| training.log_level="info" | |
| # Set log_level=DEBUG in my loggers controlled by hydra. | |
| hydra.verbose=true | |
| ``` | |
| ## About Wandb Resume | |
| We save the run id inside `training.output_dir/wandb_id`. Therefore, if the output_dir is different, then the wandb run_id should be different. | |
| - Reference: https://github.com/wandb/wandb/issues/335#issuecomment-493284910 | |