MODEL_DIR=./output BASE_L_MODEL=cardiffnlp/twitter-roberta-base BASE_LL_MODEL=cardiffnlp/twitter-roberta-base-emoji # BASE_LL_MODEL=cardiffnlp/twitter-roberta-base-emotion # BASE_LL_MODEL=cardiffnlp/twitter-roberta-base-sentiment # BASE_L_MODEL=roberta-base # BASE_L_MODEL=bert-large-cased BASE_V_MODEL=openai/clip-vit-base-patch32 # BASE_V_MODEL=google/vit-base-patch16-384 # BASE_V_MODEL=google/vit-base-patch32-384 # BASE_V_MODEL=timm/vit_huge_patch14_224_in21k #USE_TORCH=false python run_hybrid_clip.py \ --output_dir ${MODEL_DIR} \ --text_model_name_or_path=${BASE_LL_MODEL} \ --vision_model_name_or_path=${BASE_V_MODEL} \ --tokenizer_name=${BASE_L_MODEL} \ --train_file="/home/ceyda/data/train.json" \ --validation_file="/home/ceyda/data/val.json" \ --do_train --do_eval \ --num_train_epochs="40" --max_seq_length 128 \ --per_device_train_batch_size="32" \ --per_device_eval_batch_size="8" \ --learning_rate="1e-5" --warmup_steps="150" --weight_decay 0.1 \ --overwrite_output_dir \ --preprocessing_num_workers 32 # --push_to_hub