#!/bin/bash #SBATCH --job-name=lavt_ccr # Submit a job named "example" #SBATCH --mail-user=vip.maildummy@gmail.com #SBATCH --mail-type=BEGIN,END,FAIL #SBATCH --partition=a100 # a6000 or a100 #SBATCH --gres=gpu:2 #SBATCH --time=7-00:00:00 # d-hh:mm:ss, max time limit #SBATCH --mem=84000 # cpu memory size #SBATCH --cpus-per-task=8 # cpu num #SBATCH --output=log_retrieval_refcoco_lavt_one.txt # std output filename ml cuda/11.0 # 필요한 쿠다 버전 로드 eval "$(conda shell.bash hook)" # Initialize Conda Environment conda activate lavt # Activate your conda environment # train # mkdir ./models # mkdir ./models/refcoco # CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node 4 --master_port 12345 train_mosaic.py --model lavt --dataset refcoco --model_id refcoco --batch-size 8 --lr 0.00005 --wd 1e-2 --swin_type base --pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth --epochs 40 --img_size 480 2>&1 | tee ./models/refcoco/output # warmup mkdir ./experiments/retrieval_refcoco_433_10wm6to3 srun python -m torch.distributed.launch --nproc_per_node 2 --master_port 8247 train_mosaic_retrieval_warmup.py --model lavt_one --dataset refcoco --model_id retrieval_refcoco_433_10wm6to3 --resume ./checkpoints/model_best_retrieval_refcoco_433_10wm6to3.pth --batch-size 12 --lr 0.00005 --wd 1e-2 --swin_type base --pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth --epochs 50 --img_size 480 --config config/retrieval_433_10wm6to3.yaml 2>&1 | tee ./experiments/retrieval_refcoco_433_10wm6to3/log2.txt