Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu.yaml +35 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_a.yaml +36 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_r.yaml +36 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_real.yaml +36 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_sketch.yaml +36 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenetv2.yaml +36 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_a.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_r.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_real.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_sketch.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenetv2.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu.yaml +36 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_a.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_r.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_sketch.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenetv2.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu.yaml +36 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_a.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_r.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_real.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_sketch.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenetv2.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu.yaml +36 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_a.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_r.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_sketch.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenetv2.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu.yaml +36 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_a.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_r.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_real.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_sketch.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenetv2.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu.yaml +35 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_r.yaml +36 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_real.yaml +36 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_sketch.yaml +36 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenetv2.yaml +36 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_r.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_real.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_sketch.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenetv2.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu.yaml +36 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_a.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_real.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_sketch.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenetv2.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu.yaml +36 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_a.yaml +37 -0
- VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_r.yaml +37 -0
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu.yaml
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 5 |
+
DATA_PATH: './data/imagenet-1k'
|
| 6 |
+
MODEL:
|
| 7 |
+
TYPE: intern_vit_6b
|
| 8 |
+
DROP_PATH_RATE: 0.0
|
| 9 |
+
INTERN_VIT_6B:
|
| 10 |
+
FREEZE_VIT: True
|
| 11 |
+
PATCH_SIZE: 14
|
| 12 |
+
PRETRAIN_SIZE: 224
|
| 13 |
+
QKV_BIAS: False
|
| 14 |
+
EMBED_DIM: 3200
|
| 15 |
+
NUM_HEADS: 25
|
| 16 |
+
MLP_RATIO: 4
|
| 17 |
+
INIT_VALUES: 0.1
|
| 18 |
+
QK_NORMALIZATION: True
|
| 19 |
+
DEPTH: 48
|
| 20 |
+
USE_FLASH_ATTN: True
|
| 21 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 22 |
+
CLS_TARGET: 'attention_pooling'
|
| 23 |
+
TRAIN:
|
| 24 |
+
EMA:
|
| 25 |
+
ENABLE: True
|
| 26 |
+
DECAY: 0.998
|
| 27 |
+
EPOCHS: 10
|
| 28 |
+
WARMUP_EPOCHS: 1
|
| 29 |
+
WEIGHT_DECAY: 0.0
|
| 30 |
+
BASE_LR: 0.1 # 512
|
| 31 |
+
WARMUP_LR: .0
|
| 32 |
+
MIN_LR: .0
|
| 33 |
+
LR_LAYER_DECAY: false
|
| 34 |
+
OPTIMIZER:
|
| 35 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_a.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_a'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-a'
|
| 7 |
+
MODEL:
|
| 8 |
+
TYPE: intern_vit_6b
|
| 9 |
+
DROP_PATH_RATE: 0.0
|
| 10 |
+
INTERN_VIT_6B:
|
| 11 |
+
FREEZE_VIT: True
|
| 12 |
+
PATCH_SIZE: 14
|
| 13 |
+
PRETRAIN_SIZE: 224
|
| 14 |
+
QKV_BIAS: False
|
| 15 |
+
EMBED_DIM: 3200
|
| 16 |
+
NUM_HEADS: 25
|
| 17 |
+
MLP_RATIO: 4
|
| 18 |
+
INIT_VALUES: 0.1
|
| 19 |
+
QK_NORMALIZATION: True
|
| 20 |
+
DEPTH: 48
|
| 21 |
+
USE_FLASH_ATTN: True
|
| 22 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 23 |
+
CLS_TARGET: 'attention_pooling'
|
| 24 |
+
TRAIN:
|
| 25 |
+
EMA:
|
| 26 |
+
ENABLE: True
|
| 27 |
+
DECAY: 0.998
|
| 28 |
+
EPOCHS: 10
|
| 29 |
+
WARMUP_EPOCHS: 1
|
| 30 |
+
WEIGHT_DECAY: 0.0
|
| 31 |
+
BASE_LR: 0.1 # 512
|
| 32 |
+
WARMUP_LR: .0
|
| 33 |
+
MIN_LR: .0
|
| 34 |
+
LR_LAYER_DECAY: false
|
| 35 |
+
OPTIMIZER:
|
| 36 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_r.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_r'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-r'
|
| 7 |
+
MODEL:
|
| 8 |
+
TYPE: intern_vit_6b
|
| 9 |
+
DROP_PATH_RATE: 0.0
|
| 10 |
+
INTERN_VIT_6B:
|
| 11 |
+
FREEZE_VIT: True
|
| 12 |
+
PATCH_SIZE: 14
|
| 13 |
+
PRETRAIN_SIZE: 224
|
| 14 |
+
QKV_BIAS: False
|
| 15 |
+
EMBED_DIM: 3200
|
| 16 |
+
NUM_HEADS: 25
|
| 17 |
+
MLP_RATIO: 4
|
| 18 |
+
INIT_VALUES: 0.1
|
| 19 |
+
QK_NORMALIZATION: True
|
| 20 |
+
DEPTH: 48
|
| 21 |
+
USE_FLASH_ATTN: True
|
| 22 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 23 |
+
CLS_TARGET: 'attention_pooling'
|
| 24 |
+
TRAIN:
|
| 25 |
+
EMA:
|
| 26 |
+
ENABLE: True
|
| 27 |
+
DECAY: 0.998
|
| 28 |
+
EPOCHS: 10
|
| 29 |
+
WARMUP_EPOCHS: 1
|
| 30 |
+
WEIGHT_DECAY: 0.0
|
| 31 |
+
BASE_LR: 0.1 # 512
|
| 32 |
+
WARMUP_LR: .0
|
| 33 |
+
MIN_LR: .0
|
| 34 |
+
LR_LAYER_DECAY: false
|
| 35 |
+
OPTIMIZER:
|
| 36 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_real.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet-real'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-1k'
|
| 7 |
+
MODEL:
|
| 8 |
+
TYPE: intern_vit_6b
|
| 9 |
+
DROP_PATH_RATE: 0.0
|
| 10 |
+
INTERN_VIT_6B:
|
| 11 |
+
FREEZE_VIT: True
|
| 12 |
+
PATCH_SIZE: 14
|
| 13 |
+
PRETRAIN_SIZE: 224
|
| 14 |
+
QKV_BIAS: False
|
| 15 |
+
EMBED_DIM: 3200
|
| 16 |
+
NUM_HEADS: 25
|
| 17 |
+
MLP_RATIO: 4
|
| 18 |
+
INIT_VALUES: 0.1
|
| 19 |
+
QK_NORMALIZATION: True
|
| 20 |
+
DEPTH: 48
|
| 21 |
+
USE_FLASH_ATTN: True
|
| 22 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 23 |
+
CLS_TARGET: 'attention_pooling'
|
| 24 |
+
TRAIN:
|
| 25 |
+
EMA:
|
| 26 |
+
ENABLE: True
|
| 27 |
+
DECAY: 0.998
|
| 28 |
+
EPOCHS: 10
|
| 29 |
+
WARMUP_EPOCHS: 1
|
| 30 |
+
WEIGHT_DECAY: 0.0
|
| 31 |
+
BASE_LR: 0.1 # 512
|
| 32 |
+
WARMUP_LR: .0
|
| 33 |
+
MIN_LR: .0
|
| 34 |
+
LR_LAYER_DECAY: false
|
| 35 |
+
OPTIMIZER:
|
| 36 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_sketch.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_sketch'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-sketch'
|
| 7 |
+
MODEL:
|
| 8 |
+
TYPE: intern_vit_6b
|
| 9 |
+
DROP_PATH_RATE: 0.0
|
| 10 |
+
INTERN_VIT_6B:
|
| 11 |
+
FREEZE_VIT: True
|
| 12 |
+
PATCH_SIZE: 14
|
| 13 |
+
PRETRAIN_SIZE: 224
|
| 14 |
+
QKV_BIAS: False
|
| 15 |
+
EMBED_DIM: 3200
|
| 16 |
+
NUM_HEADS: 25
|
| 17 |
+
MLP_RATIO: 4
|
| 18 |
+
INIT_VALUES: 0.1
|
| 19 |
+
QK_NORMALIZATION: True
|
| 20 |
+
DEPTH: 48
|
| 21 |
+
USE_FLASH_ATTN: True
|
| 22 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 23 |
+
CLS_TARGET: 'attention_pooling'
|
| 24 |
+
TRAIN:
|
| 25 |
+
EMA:
|
| 26 |
+
ENABLE: True
|
| 27 |
+
DECAY: 0.998
|
| 28 |
+
EPOCHS: 10
|
| 29 |
+
WARMUP_EPOCHS: 1
|
| 30 |
+
WEIGHT_DECAY: 0.0
|
| 31 |
+
BASE_LR: 0.1 # 512
|
| 32 |
+
WARMUP_LR: .0
|
| 33 |
+
MIN_LR: .0
|
| 34 |
+
LR_LAYER_DECAY: false
|
| 35 |
+
OPTIMIZER:
|
| 36 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenetv2.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenetv2'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenetv2'
|
| 7 |
+
MODEL:
|
| 8 |
+
TYPE: intern_vit_6b
|
| 9 |
+
DROP_PATH_RATE: 0.0
|
| 10 |
+
INTERN_VIT_6B:
|
| 11 |
+
FREEZE_VIT: True
|
| 12 |
+
PATCH_SIZE: 14
|
| 13 |
+
PRETRAIN_SIZE: 224
|
| 14 |
+
QKV_BIAS: False
|
| 15 |
+
EMBED_DIM: 3200
|
| 16 |
+
NUM_HEADS: 25
|
| 17 |
+
MLP_RATIO: 4
|
| 18 |
+
INIT_VALUES: 0.1
|
| 19 |
+
QK_NORMALIZATION: True
|
| 20 |
+
DEPTH: 48
|
| 21 |
+
USE_FLASH_ATTN: True
|
| 22 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 23 |
+
CLS_TARGET: 'attention_pooling'
|
| 24 |
+
TRAIN:
|
| 25 |
+
EMA:
|
| 26 |
+
ENABLE: True
|
| 27 |
+
DECAY: 0.998
|
| 28 |
+
EPOCHS: 10
|
| 29 |
+
WARMUP_EPOCHS: 1
|
| 30 |
+
WEIGHT_DECAY: 0.0
|
| 31 |
+
BASE_LR: 0.1 # 512
|
| 32 |
+
WARMUP_LR: .0
|
| 33 |
+
MIN_LR: .0
|
| 34 |
+
LR_LAYER_DECAY: false
|
| 35 |
+
OPTIMIZER:
|
| 36 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_a.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_a'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-a'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 224
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 48
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_r.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_r'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-r'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 224
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 48
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_real.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet-real'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-1k'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 224
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 48
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_sketch.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_sketch'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-sketch'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 224
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 48
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenetv2.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenetv2'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenetv2'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 224
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 48
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 5 |
+
DATA_PATH: './data/imagenet-1k'
|
| 6 |
+
IMG_SIZE: 448
|
| 7 |
+
MODEL:
|
| 8 |
+
TYPE: intern_vit_6b
|
| 9 |
+
DROP_PATH_RATE: 0.0
|
| 10 |
+
INTERN_VIT_6B:
|
| 11 |
+
FREEZE_VIT: True
|
| 12 |
+
PATCH_SIZE: 14
|
| 13 |
+
PRETRAIN_SIZE: 448
|
| 14 |
+
QKV_BIAS: False
|
| 15 |
+
EMBED_DIM: 3200
|
| 16 |
+
NUM_HEADS: 25
|
| 17 |
+
MLP_RATIO: 4
|
| 18 |
+
INIT_VALUES: 0.1
|
| 19 |
+
QK_NORMALIZATION: True
|
| 20 |
+
DEPTH: 45
|
| 21 |
+
USE_FLASH_ATTN: True
|
| 22 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
|
| 23 |
+
CLS_TARGET: 'attention_pooling'
|
| 24 |
+
TRAIN:
|
| 25 |
+
EMA:
|
| 26 |
+
ENABLE: True
|
| 27 |
+
DECAY: 0.998
|
| 28 |
+
EPOCHS: 10
|
| 29 |
+
WARMUP_EPOCHS: 1
|
| 30 |
+
WEIGHT_DECAY: 0.0
|
| 31 |
+
BASE_LR: 0.1 # 512
|
| 32 |
+
WARMUP_LR: .0
|
| 33 |
+
MIN_LR: .0
|
| 34 |
+
LR_LAYER_DECAY: false
|
| 35 |
+
OPTIMIZER:
|
| 36 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_a.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_a'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-a'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_r.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_r'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-r'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_sketch.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_sketch'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-sketch'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenetv2.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenetv2'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenetv2'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 5 |
+
DATA_PATH: './data/imagenet-1k'
|
| 6 |
+
IMG_SIZE: 448
|
| 7 |
+
MODEL:
|
| 8 |
+
TYPE: intern_vit_6b
|
| 9 |
+
DROP_PATH_RATE: 0.0
|
| 10 |
+
INTERN_VIT_6B:
|
| 11 |
+
FREEZE_VIT: True
|
| 12 |
+
PATCH_SIZE: 14
|
| 13 |
+
PRETRAIN_SIZE: 448
|
| 14 |
+
QKV_BIAS: False
|
| 15 |
+
EMBED_DIM: 3200
|
| 16 |
+
NUM_HEADS: 25
|
| 17 |
+
MLP_RATIO: 4
|
| 18 |
+
INIT_VALUES: 0.1
|
| 19 |
+
QK_NORMALIZATION: True
|
| 20 |
+
DEPTH: 45
|
| 21 |
+
USE_FLASH_ATTN: True
|
| 22 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
|
| 23 |
+
CLS_TARGET: 'attention_pooling'
|
| 24 |
+
TRAIN:
|
| 25 |
+
EMA:
|
| 26 |
+
ENABLE: True
|
| 27 |
+
DECAY: 0.998
|
| 28 |
+
EPOCHS: 10
|
| 29 |
+
WARMUP_EPOCHS: 1
|
| 30 |
+
WEIGHT_DECAY: 0.0
|
| 31 |
+
BASE_LR: 0.1 # 512
|
| 32 |
+
WARMUP_LR: .0
|
| 33 |
+
MIN_LR: .0
|
| 34 |
+
LR_LAYER_DECAY: false
|
| 35 |
+
OPTIMIZER:
|
| 36 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_a.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_a'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-a'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_r.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_r'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-r'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_real.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet-real'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-1k'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_sketch.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_sketch'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-sketch'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenetv2.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenetv2'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenetv2'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 5 |
+
DATA_PATH: './data/imagenet-1k'
|
| 6 |
+
IMG_SIZE: 448
|
| 7 |
+
MODEL:
|
| 8 |
+
TYPE: intern_vit_6b
|
| 9 |
+
DROP_PATH_RATE: 0.0
|
| 10 |
+
INTERN_VIT_6B:
|
| 11 |
+
FREEZE_VIT: True
|
| 12 |
+
PATCH_SIZE: 14
|
| 13 |
+
PRETRAIN_SIZE: 448
|
| 14 |
+
QKV_BIAS: False
|
| 15 |
+
EMBED_DIM: 3200
|
| 16 |
+
NUM_HEADS: 25
|
| 17 |
+
MLP_RATIO: 4
|
| 18 |
+
INIT_VALUES: 0.1
|
| 19 |
+
QK_NORMALIZATION: True
|
| 20 |
+
DEPTH: 45
|
| 21 |
+
USE_FLASH_ATTN: True
|
| 22 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_5.pth"
|
| 23 |
+
CLS_TARGET: 'attention_pooling'
|
| 24 |
+
TRAIN:
|
| 25 |
+
EMA:
|
| 26 |
+
ENABLE: True
|
| 27 |
+
DECAY: 0.998
|
| 28 |
+
EPOCHS: 10
|
| 29 |
+
WARMUP_EPOCHS: 1
|
| 30 |
+
WEIGHT_DECAY: 0.0
|
| 31 |
+
BASE_LR: 0.1 # 512
|
| 32 |
+
WARMUP_LR: .0
|
| 33 |
+
MIN_LR: .0
|
| 34 |
+
LR_LAYER_DECAY: false
|
| 35 |
+
OPTIMIZER:
|
| 36 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_a.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_a'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-a'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_5.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_r.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_r'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-r'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_5.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_sketch.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_sketch'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-sketch'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_5.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenetv2.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenetv2'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenetv2'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_5.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 5 |
+
DATA_PATH: './data/imagenet-1k'
|
| 6 |
+
IMG_SIZE: 448
|
| 7 |
+
MODEL:
|
| 8 |
+
TYPE: intern_vit_6b
|
| 9 |
+
DROP_PATH_RATE: 0.0
|
| 10 |
+
INTERN_VIT_6B:
|
| 11 |
+
FREEZE_VIT: True
|
| 12 |
+
PATCH_SIZE: 14
|
| 13 |
+
PRETRAIN_SIZE: 448
|
| 14 |
+
QKV_BIAS: False
|
| 15 |
+
EMBED_DIM: 3200
|
| 16 |
+
NUM_HEADS: 25
|
| 17 |
+
MLP_RATIO: 4
|
| 18 |
+
INIT_VALUES: 0.1
|
| 19 |
+
QK_NORMALIZATION: True
|
| 20 |
+
DEPTH: 45
|
| 21 |
+
USE_FLASH_ATTN: True
|
| 22 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v2_5.pth"
|
| 23 |
+
CLS_TARGET: 'attention_pooling'
|
| 24 |
+
TRAIN:
|
| 25 |
+
EMA:
|
| 26 |
+
ENABLE: True
|
| 27 |
+
DECAY: 0.998
|
| 28 |
+
EPOCHS: 10
|
| 29 |
+
WARMUP_EPOCHS: 1
|
| 30 |
+
WEIGHT_DECAY: 0.0
|
| 31 |
+
BASE_LR: 0.1 # 512
|
| 32 |
+
WARMUP_LR: .0
|
| 33 |
+
MIN_LR: .0
|
| 34 |
+
LR_LAYER_DECAY: false
|
| 35 |
+
OPTIMIZER:
|
| 36 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_a.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_a'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-a'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v2_5.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_r.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_r'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-r'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v2_5.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_real.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet-real'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-1k'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v2_5.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_sketch.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_sketch'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-sketch'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v2_5.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenetv2.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenetv2'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenetv2'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v2_5.pth"
|
| 24 |
+
CLS_TARGET: 'attention_pooling'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu.yaml
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 5 |
+
DATA_PATH: './data/imagenet-1k'
|
| 6 |
+
MODEL:
|
| 7 |
+
TYPE: intern_vit_6b
|
| 8 |
+
DROP_PATH_RATE: 0.0
|
| 9 |
+
INTERN_VIT_6B:
|
| 10 |
+
FREEZE_VIT: True
|
| 11 |
+
PATCH_SIZE: 14
|
| 12 |
+
PRETRAIN_SIZE: 224
|
| 13 |
+
QKV_BIAS: False
|
| 14 |
+
EMBED_DIM: 3200
|
| 15 |
+
NUM_HEADS: 25
|
| 16 |
+
MLP_RATIO: 4
|
| 17 |
+
INIT_VALUES: 0.1
|
| 18 |
+
QK_NORMALIZATION: True
|
| 19 |
+
DEPTH: 48
|
| 20 |
+
USE_FLASH_ATTN: True
|
| 21 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 22 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 23 |
+
TRAIN:
|
| 24 |
+
EMA:
|
| 25 |
+
ENABLE: True
|
| 26 |
+
DECAY: 0.998
|
| 27 |
+
EPOCHS: 10
|
| 28 |
+
WARMUP_EPOCHS: 1
|
| 29 |
+
WEIGHT_DECAY: 0.0
|
| 30 |
+
BASE_LR: 0.1 # 512
|
| 31 |
+
WARMUP_LR: .0
|
| 32 |
+
MIN_LR: .0
|
| 33 |
+
LR_LAYER_DECAY: false
|
| 34 |
+
OPTIMIZER:
|
| 35 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_r.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_r'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-r'
|
| 7 |
+
MODEL:
|
| 8 |
+
TYPE: intern_vit_6b
|
| 9 |
+
DROP_PATH_RATE: 0.0
|
| 10 |
+
INTERN_VIT_6B:
|
| 11 |
+
FREEZE_VIT: True
|
| 12 |
+
PATCH_SIZE: 14
|
| 13 |
+
PRETRAIN_SIZE: 224
|
| 14 |
+
QKV_BIAS: False
|
| 15 |
+
EMBED_DIM: 3200
|
| 16 |
+
NUM_HEADS: 25
|
| 17 |
+
MLP_RATIO: 4
|
| 18 |
+
INIT_VALUES: 0.1
|
| 19 |
+
QK_NORMALIZATION: True
|
| 20 |
+
DEPTH: 48
|
| 21 |
+
USE_FLASH_ATTN: True
|
| 22 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 23 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 24 |
+
TRAIN:
|
| 25 |
+
EMA:
|
| 26 |
+
ENABLE: True
|
| 27 |
+
DECAY: 0.998
|
| 28 |
+
EPOCHS: 10
|
| 29 |
+
WARMUP_EPOCHS: 1
|
| 30 |
+
WEIGHT_DECAY: 0.0
|
| 31 |
+
BASE_LR: 0.1 # 512
|
| 32 |
+
WARMUP_LR: .0
|
| 33 |
+
MIN_LR: .0
|
| 34 |
+
LR_LAYER_DECAY: false
|
| 35 |
+
OPTIMIZER:
|
| 36 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_real.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet-real'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-1k'
|
| 7 |
+
MODEL:
|
| 8 |
+
TYPE: intern_vit_6b
|
| 9 |
+
DROP_PATH_RATE: 0.0
|
| 10 |
+
INTERN_VIT_6B:
|
| 11 |
+
FREEZE_VIT: True
|
| 12 |
+
PATCH_SIZE: 14
|
| 13 |
+
PRETRAIN_SIZE: 224
|
| 14 |
+
QKV_BIAS: False
|
| 15 |
+
EMBED_DIM: 3200
|
| 16 |
+
NUM_HEADS: 25
|
| 17 |
+
MLP_RATIO: 4
|
| 18 |
+
INIT_VALUES: 0.1
|
| 19 |
+
QK_NORMALIZATION: True
|
| 20 |
+
DEPTH: 48
|
| 21 |
+
USE_FLASH_ATTN: True
|
| 22 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 23 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 24 |
+
TRAIN:
|
| 25 |
+
EMA:
|
| 26 |
+
ENABLE: True
|
| 27 |
+
DECAY: 0.998
|
| 28 |
+
EPOCHS: 10
|
| 29 |
+
WARMUP_EPOCHS: 1
|
| 30 |
+
WEIGHT_DECAY: 0.0
|
| 31 |
+
BASE_LR: 0.1 # 512
|
| 32 |
+
WARMUP_LR: .0
|
| 33 |
+
MIN_LR: .0
|
| 34 |
+
LR_LAYER_DECAY: false
|
| 35 |
+
OPTIMIZER:
|
| 36 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_sketch.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_sketch'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-sketch'
|
| 7 |
+
MODEL:
|
| 8 |
+
TYPE: intern_vit_6b
|
| 9 |
+
DROP_PATH_RATE: 0.0
|
| 10 |
+
INTERN_VIT_6B:
|
| 11 |
+
FREEZE_VIT: True
|
| 12 |
+
PATCH_SIZE: 14
|
| 13 |
+
PRETRAIN_SIZE: 224
|
| 14 |
+
QKV_BIAS: False
|
| 15 |
+
EMBED_DIM: 3200
|
| 16 |
+
NUM_HEADS: 25
|
| 17 |
+
MLP_RATIO: 4
|
| 18 |
+
INIT_VALUES: 0.1
|
| 19 |
+
QK_NORMALIZATION: True
|
| 20 |
+
DEPTH: 48
|
| 21 |
+
USE_FLASH_ATTN: True
|
| 22 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 23 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 24 |
+
TRAIN:
|
| 25 |
+
EMA:
|
| 26 |
+
ENABLE: True
|
| 27 |
+
DECAY: 0.998
|
| 28 |
+
EPOCHS: 10
|
| 29 |
+
WARMUP_EPOCHS: 1
|
| 30 |
+
WEIGHT_DECAY: 0.0
|
| 31 |
+
BASE_LR: 0.1 # 512
|
| 32 |
+
WARMUP_LR: .0
|
| 33 |
+
MIN_LR: .0
|
| 34 |
+
LR_LAYER_DECAY: false
|
| 35 |
+
OPTIMIZER:
|
| 36 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenetv2.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenetv2'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenetv2'
|
| 7 |
+
MODEL:
|
| 8 |
+
TYPE: intern_vit_6b
|
| 9 |
+
DROP_PATH_RATE: 0.0
|
| 10 |
+
INTERN_VIT_6B:
|
| 11 |
+
FREEZE_VIT: True
|
| 12 |
+
PATCH_SIZE: 14
|
| 13 |
+
PRETRAIN_SIZE: 224
|
| 14 |
+
QKV_BIAS: False
|
| 15 |
+
EMBED_DIM: 3200
|
| 16 |
+
NUM_HEADS: 25
|
| 17 |
+
MLP_RATIO: 4
|
| 18 |
+
INIT_VALUES: 0.1
|
| 19 |
+
QK_NORMALIZATION: True
|
| 20 |
+
DEPTH: 48
|
| 21 |
+
USE_FLASH_ATTN: True
|
| 22 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 23 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 24 |
+
TRAIN:
|
| 25 |
+
EMA:
|
| 26 |
+
ENABLE: True
|
| 27 |
+
DECAY: 0.998
|
| 28 |
+
EPOCHS: 10
|
| 29 |
+
WARMUP_EPOCHS: 1
|
| 30 |
+
WEIGHT_DECAY: 0.0
|
| 31 |
+
BASE_LR: 0.1 # 512
|
| 32 |
+
WARMUP_LR: .0
|
| 33 |
+
MIN_LR: .0
|
| 34 |
+
LR_LAYER_DECAY: false
|
| 35 |
+
OPTIMIZER:
|
| 36 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_r.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_r'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-r'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 224
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 48
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 24 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_real.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet-real'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-1k'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 224
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 48
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 24 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_sketch.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_sketch'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-sketch'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 224
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 48
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 24 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenetv2.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenetv2'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenetv2'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 224
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 48
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
|
| 24 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 5 |
+
DATA_PATH: './data/imagenet-1k'
|
| 6 |
+
IMG_SIZE: 448
|
| 7 |
+
MODEL:
|
| 8 |
+
TYPE: intern_vit_6b
|
| 9 |
+
DROP_PATH_RATE: 0.0
|
| 10 |
+
INTERN_VIT_6B:
|
| 11 |
+
FREEZE_VIT: True
|
| 12 |
+
PATCH_SIZE: 14
|
| 13 |
+
PRETRAIN_SIZE: 448
|
| 14 |
+
QKV_BIAS: False
|
| 15 |
+
EMBED_DIM: 3200
|
| 16 |
+
NUM_HEADS: 25
|
| 17 |
+
MLP_RATIO: 4
|
| 18 |
+
INIT_VALUES: 0.1
|
| 19 |
+
QK_NORMALIZATION: True
|
| 20 |
+
DEPTH: 45
|
| 21 |
+
USE_FLASH_ATTN: True
|
| 22 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
|
| 23 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 24 |
+
TRAIN:
|
| 25 |
+
EMA:
|
| 26 |
+
ENABLE: True
|
| 27 |
+
DECAY: 0.998
|
| 28 |
+
EPOCHS: 10
|
| 29 |
+
WARMUP_EPOCHS: 1
|
| 30 |
+
WEIGHT_DECAY: 0.0
|
| 31 |
+
BASE_LR: 0.1 # 512
|
| 32 |
+
WARMUP_LR: .0
|
| 33 |
+
MIN_LR: .0
|
| 34 |
+
LR_LAYER_DECAY: false
|
| 35 |
+
OPTIMIZER:
|
| 36 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_a.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_a'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-a'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
|
| 24 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_real.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet-real'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-1k'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
|
| 24 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_sketch.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_sketch'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-sketch'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
|
| 24 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenetv2.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenetv2'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenetv2'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
|
| 24 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 5 |
+
DATA_PATH: './data/imagenet-1k'
|
| 6 |
+
IMG_SIZE: 448
|
| 7 |
+
MODEL:
|
| 8 |
+
TYPE: intern_vit_6b
|
| 9 |
+
DROP_PATH_RATE: 0.0
|
| 10 |
+
INTERN_VIT_6B:
|
| 11 |
+
FREEZE_VIT: True
|
| 12 |
+
PATCH_SIZE: 14
|
| 13 |
+
PRETRAIN_SIZE: 448
|
| 14 |
+
QKV_BIAS: False
|
| 15 |
+
EMBED_DIM: 3200
|
| 16 |
+
NUM_HEADS: 25
|
| 17 |
+
MLP_RATIO: 4
|
| 18 |
+
INIT_VALUES: 0.1
|
| 19 |
+
QK_NORMALIZATION: True
|
| 20 |
+
DEPTH: 45
|
| 21 |
+
USE_FLASH_ATTN: True
|
| 22 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
|
| 23 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 24 |
+
TRAIN:
|
| 25 |
+
EMA:
|
| 26 |
+
ENABLE: True
|
| 27 |
+
DECAY: 0.998
|
| 28 |
+
EPOCHS: 10
|
| 29 |
+
WARMUP_EPOCHS: 1
|
| 30 |
+
WEIGHT_DECAY: 0.0
|
| 31 |
+
BASE_LR: 0.1 # 512
|
| 32 |
+
WARMUP_LR: .0
|
| 33 |
+
MIN_LR: .0
|
| 34 |
+
LR_LAYER_DECAY: false
|
| 35 |
+
OPTIMIZER:
|
| 36 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_a.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_a'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-a'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
|
| 24 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_r.yaml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
IMG_ON_MEMORY: False
|
| 3 |
+
BATCH_SIZE: 16 # single GPU batch size
|
| 4 |
+
DATASET: 'imagenet_r'
|
| 5 |
+
TRANSFORM: 'build_transform_for_linear_probe'
|
| 6 |
+
DATA_PATH: './data/imagenet-r'
|
| 7 |
+
IMG_SIZE: 448
|
| 8 |
+
MODEL:
|
| 9 |
+
TYPE: intern_vit_6b
|
| 10 |
+
DROP_PATH_RATE: 0.0
|
| 11 |
+
INTERN_VIT_6B:
|
| 12 |
+
FREEZE_VIT: True
|
| 13 |
+
PATCH_SIZE: 14
|
| 14 |
+
PRETRAIN_SIZE: 448
|
| 15 |
+
QKV_BIAS: False
|
| 16 |
+
EMBED_DIM: 3200
|
| 17 |
+
NUM_HEADS: 25
|
| 18 |
+
MLP_RATIO: 4
|
| 19 |
+
INIT_VALUES: 0.1
|
| 20 |
+
QK_NORMALIZATION: True
|
| 21 |
+
DEPTH: 45
|
| 22 |
+
USE_FLASH_ATTN: True
|
| 23 |
+
PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
|
| 24 |
+
CLS_TARGET: 'cls_patch_concat'
|
| 25 |
+
TRAIN:
|
| 26 |
+
EMA:
|
| 27 |
+
ENABLE: True
|
| 28 |
+
DECAY: 0.998
|
| 29 |
+
EPOCHS: 10
|
| 30 |
+
WARMUP_EPOCHS: 1
|
| 31 |
+
WEIGHT_DECAY: 0.0
|
| 32 |
+
BASE_LR: 0.1 # 512
|
| 33 |
+
WARMUP_LR: .0
|
| 34 |
+
MIN_LR: .0
|
| 35 |
+
LR_LAYER_DECAY: false
|
| 36 |
+
OPTIMIZER:
|
| 37 |
+
NAME: 'sgd'
|