tuandunghcmut commited on
Commit
6be5f84
·
verified ·
1 Parent(s): 2ba2119

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu.yaml +35 -0
  2. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_a.yaml +36 -0
  3. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_r.yaml +36 -0
  4. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_real.yaml +36 -0
  5. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_sketch.yaml +36 -0
  6. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenetv2.yaml +36 -0
  7. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_a.yaml +37 -0
  8. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_r.yaml +37 -0
  9. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_real.yaml +37 -0
  10. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_sketch.yaml +37 -0
  11. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenetv2.yaml +37 -0
  12. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu.yaml +36 -0
  13. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_a.yaml +37 -0
  14. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_r.yaml +37 -0
  15. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_sketch.yaml +37 -0
  16. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenetv2.yaml +37 -0
  17. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu.yaml +36 -0
  18. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_a.yaml +37 -0
  19. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_r.yaml +37 -0
  20. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_real.yaml +37 -0
  21. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_sketch.yaml +37 -0
  22. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenetv2.yaml +37 -0
  23. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu.yaml +36 -0
  24. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_a.yaml +37 -0
  25. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_r.yaml +37 -0
  26. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_sketch.yaml +37 -0
  27. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenetv2.yaml +37 -0
  28. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu.yaml +36 -0
  29. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_a.yaml +37 -0
  30. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_r.yaml +37 -0
  31. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_real.yaml +37 -0
  32. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_sketch.yaml +37 -0
  33. VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenetv2.yaml +37 -0
  34. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu.yaml +35 -0
  35. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_r.yaml +36 -0
  36. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_real.yaml +36 -0
  37. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_sketch.yaml +36 -0
  38. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenetv2.yaml +36 -0
  39. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_r.yaml +37 -0
  40. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_real.yaml +37 -0
  41. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_sketch.yaml +37 -0
  42. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenetv2.yaml +37 -0
  43. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu.yaml +36 -0
  44. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_a.yaml +37 -0
  45. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_real.yaml +37 -0
  46. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_sketch.yaml +37 -0
  47. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenetv2.yaml +37 -0
  48. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu.yaml +36 -0
  49. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_a.yaml +37 -0
  50. VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_r.yaml +37 -0
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu.yaml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ TRANSFORM: 'build_transform_for_linear_probe'
5
+ DATA_PATH: './data/imagenet-1k'
6
+ MODEL:
7
+ TYPE: intern_vit_6b
8
+ DROP_PATH_RATE: 0.0
9
+ INTERN_VIT_6B:
10
+ FREEZE_VIT: True
11
+ PATCH_SIZE: 14
12
+ PRETRAIN_SIZE: 224
13
+ QKV_BIAS: False
14
+ EMBED_DIM: 3200
15
+ NUM_HEADS: 25
16
+ MLP_RATIO: 4
17
+ INIT_VALUES: 0.1
18
+ QK_NORMALIZATION: True
19
+ DEPTH: 48
20
+ USE_FLASH_ATTN: True
21
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
22
+ CLS_TARGET: 'attention_pooling'
23
+ TRAIN:
24
+ EMA:
25
+ ENABLE: True
26
+ DECAY: 0.998
27
+ EPOCHS: 10
28
+ WARMUP_EPOCHS: 1
29
+ WEIGHT_DECAY: 0.0
30
+ BASE_LR: 0.1 # 512
31
+ WARMUP_LR: .0
32
+ MIN_LR: .0
33
+ LR_LAYER_DECAY: false
34
+ OPTIMIZER:
35
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_a.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_a'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-a'
7
+ MODEL:
8
+ TYPE: intern_vit_6b
9
+ DROP_PATH_RATE: 0.0
10
+ INTERN_VIT_6B:
11
+ FREEZE_VIT: True
12
+ PATCH_SIZE: 14
13
+ PRETRAIN_SIZE: 224
14
+ QKV_BIAS: False
15
+ EMBED_DIM: 3200
16
+ NUM_HEADS: 25
17
+ MLP_RATIO: 4
18
+ INIT_VALUES: 0.1
19
+ QK_NORMALIZATION: True
20
+ DEPTH: 48
21
+ USE_FLASH_ATTN: True
22
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
23
+ CLS_TARGET: 'attention_pooling'
24
+ TRAIN:
25
+ EMA:
26
+ ENABLE: True
27
+ DECAY: 0.998
28
+ EPOCHS: 10
29
+ WARMUP_EPOCHS: 1
30
+ WEIGHT_DECAY: 0.0
31
+ BASE_LR: 0.1 # 512
32
+ WARMUP_LR: .0
33
+ MIN_LR: .0
34
+ LR_LAYER_DECAY: false
35
+ OPTIMIZER:
36
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_r.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_r'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-r'
7
+ MODEL:
8
+ TYPE: intern_vit_6b
9
+ DROP_PATH_RATE: 0.0
10
+ INTERN_VIT_6B:
11
+ FREEZE_VIT: True
12
+ PATCH_SIZE: 14
13
+ PRETRAIN_SIZE: 224
14
+ QKV_BIAS: False
15
+ EMBED_DIM: 3200
16
+ NUM_HEADS: 25
17
+ MLP_RATIO: 4
18
+ INIT_VALUES: 0.1
19
+ QK_NORMALIZATION: True
20
+ DEPTH: 48
21
+ USE_FLASH_ATTN: True
22
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
23
+ CLS_TARGET: 'attention_pooling'
24
+ TRAIN:
25
+ EMA:
26
+ ENABLE: True
27
+ DECAY: 0.998
28
+ EPOCHS: 10
29
+ WARMUP_EPOCHS: 1
30
+ WEIGHT_DECAY: 0.0
31
+ BASE_LR: 0.1 # 512
32
+ WARMUP_LR: .0
33
+ MIN_LR: .0
34
+ LR_LAYER_DECAY: false
35
+ OPTIMIZER:
36
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_real.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet-real'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-1k'
7
+ MODEL:
8
+ TYPE: intern_vit_6b
9
+ DROP_PATH_RATE: 0.0
10
+ INTERN_VIT_6B:
11
+ FREEZE_VIT: True
12
+ PATCH_SIZE: 14
13
+ PRETRAIN_SIZE: 224
14
+ QKV_BIAS: False
15
+ EMBED_DIM: 3200
16
+ NUM_HEADS: 25
17
+ MLP_RATIO: 4
18
+ INIT_VALUES: 0.1
19
+ QK_NORMALIZATION: True
20
+ DEPTH: 48
21
+ USE_FLASH_ATTN: True
22
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
23
+ CLS_TARGET: 'attention_pooling'
24
+ TRAIN:
25
+ EMA:
26
+ ENABLE: True
27
+ DECAY: 0.998
28
+ EPOCHS: 10
29
+ WARMUP_EPOCHS: 1
30
+ WEIGHT_DECAY: 0.0
31
+ BASE_LR: 0.1 # 512
32
+ WARMUP_LR: .0
33
+ MIN_LR: .0
34
+ LR_LAYER_DECAY: false
35
+ OPTIMIZER:
36
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_sketch.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_sketch'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-sketch'
7
+ MODEL:
8
+ TYPE: intern_vit_6b
9
+ DROP_PATH_RATE: 0.0
10
+ INTERN_VIT_6B:
11
+ FREEZE_VIT: True
12
+ PATCH_SIZE: 14
13
+ PRETRAIN_SIZE: 224
14
+ QKV_BIAS: False
15
+ EMBED_DIM: 3200
16
+ NUM_HEADS: 25
17
+ MLP_RATIO: 4
18
+ INIT_VALUES: 0.1
19
+ QK_NORMALIZATION: True
20
+ DEPTH: 48
21
+ USE_FLASH_ATTN: True
22
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
23
+ CLS_TARGET: 'attention_pooling'
24
+ TRAIN:
25
+ EMA:
26
+ ENABLE: True
27
+ DECAY: 0.998
28
+ EPOCHS: 10
29
+ WARMUP_EPOCHS: 1
30
+ WEIGHT_DECAY: 0.0
31
+ BASE_LR: 0.1 # 512
32
+ WARMUP_LR: .0
33
+ MIN_LR: .0
34
+ LR_LAYER_DECAY: false
35
+ OPTIMIZER:
36
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenetv2.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenetv2'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenetv2'
7
+ MODEL:
8
+ TYPE: intern_vit_6b
9
+ DROP_PATH_RATE: 0.0
10
+ INTERN_VIT_6B:
11
+ FREEZE_VIT: True
12
+ PATCH_SIZE: 14
13
+ PRETRAIN_SIZE: 224
14
+ QKV_BIAS: False
15
+ EMBED_DIM: 3200
16
+ NUM_HEADS: 25
17
+ MLP_RATIO: 4
18
+ INIT_VALUES: 0.1
19
+ QK_NORMALIZATION: True
20
+ DEPTH: 48
21
+ USE_FLASH_ATTN: True
22
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
23
+ CLS_TARGET: 'attention_pooling'
24
+ TRAIN:
25
+ EMA:
26
+ ENABLE: True
27
+ DECAY: 0.998
28
+ EPOCHS: 10
29
+ WARMUP_EPOCHS: 1
30
+ WEIGHT_DECAY: 0.0
31
+ BASE_LR: 0.1 # 512
32
+ WARMUP_LR: .0
33
+ MIN_LR: .0
34
+ LR_LAYER_DECAY: false
35
+ OPTIMIZER:
36
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_a.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_a'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-a'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 224
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 48
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_r.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_r'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-r'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 224
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 48
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_real.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet-real'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-1k'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 224
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 48
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_sketch.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_sketch'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-sketch'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 224
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 48
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenetv2.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenetv2'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenetv2'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 224
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 48
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ TRANSFORM: 'build_transform_for_linear_probe'
5
+ DATA_PATH: './data/imagenet-1k'
6
+ IMG_SIZE: 448
7
+ MODEL:
8
+ TYPE: intern_vit_6b
9
+ DROP_PATH_RATE: 0.0
10
+ INTERN_VIT_6B:
11
+ FREEZE_VIT: True
12
+ PATCH_SIZE: 14
13
+ PRETRAIN_SIZE: 448
14
+ QKV_BIAS: False
15
+ EMBED_DIM: 3200
16
+ NUM_HEADS: 25
17
+ MLP_RATIO: 4
18
+ INIT_VALUES: 0.1
19
+ QK_NORMALIZATION: True
20
+ DEPTH: 45
21
+ USE_FLASH_ATTN: True
22
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
23
+ CLS_TARGET: 'attention_pooling'
24
+ TRAIN:
25
+ EMA:
26
+ ENABLE: True
27
+ DECAY: 0.998
28
+ EPOCHS: 10
29
+ WARMUP_EPOCHS: 1
30
+ WEIGHT_DECAY: 0.0
31
+ BASE_LR: 0.1 # 512
32
+ WARMUP_LR: .0
33
+ MIN_LR: .0
34
+ LR_LAYER_DECAY: false
35
+ OPTIMIZER:
36
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_a.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_a'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-a'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_r.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_r'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-r'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_sketch.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_sketch'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-sketch'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenetv2.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenetv2'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenetv2'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ TRANSFORM: 'build_transform_for_linear_probe'
5
+ DATA_PATH: './data/imagenet-1k'
6
+ IMG_SIZE: 448
7
+ MODEL:
8
+ TYPE: intern_vit_6b
9
+ DROP_PATH_RATE: 0.0
10
+ INTERN_VIT_6B:
11
+ FREEZE_VIT: True
12
+ PATCH_SIZE: 14
13
+ PRETRAIN_SIZE: 448
14
+ QKV_BIAS: False
15
+ EMBED_DIM: 3200
16
+ NUM_HEADS: 25
17
+ MLP_RATIO: 4
18
+ INIT_VALUES: 0.1
19
+ QK_NORMALIZATION: True
20
+ DEPTH: 45
21
+ USE_FLASH_ATTN: True
22
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
23
+ CLS_TARGET: 'attention_pooling'
24
+ TRAIN:
25
+ EMA:
26
+ ENABLE: True
27
+ DECAY: 0.998
28
+ EPOCHS: 10
29
+ WARMUP_EPOCHS: 1
30
+ WEIGHT_DECAY: 0.0
31
+ BASE_LR: 0.1 # 512
32
+ WARMUP_LR: .0
33
+ MIN_LR: .0
34
+ LR_LAYER_DECAY: false
35
+ OPTIMIZER:
36
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_a.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_a'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-a'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_r.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_r'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-r'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_real.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet-real'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-1k'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_sketch.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_sketch'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-sketch'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenetv2.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenetv2'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenetv2'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ TRANSFORM: 'build_transform_for_linear_probe'
5
+ DATA_PATH: './data/imagenet-1k'
6
+ IMG_SIZE: 448
7
+ MODEL:
8
+ TYPE: intern_vit_6b
9
+ DROP_PATH_RATE: 0.0
10
+ INTERN_VIT_6B:
11
+ FREEZE_VIT: True
12
+ PATCH_SIZE: 14
13
+ PRETRAIN_SIZE: 448
14
+ QKV_BIAS: False
15
+ EMBED_DIM: 3200
16
+ NUM_HEADS: 25
17
+ MLP_RATIO: 4
18
+ INIT_VALUES: 0.1
19
+ QK_NORMALIZATION: True
20
+ DEPTH: 45
21
+ USE_FLASH_ATTN: True
22
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_5.pth"
23
+ CLS_TARGET: 'attention_pooling'
24
+ TRAIN:
25
+ EMA:
26
+ ENABLE: True
27
+ DECAY: 0.998
28
+ EPOCHS: 10
29
+ WARMUP_EPOCHS: 1
30
+ WEIGHT_DECAY: 0.0
31
+ BASE_LR: 0.1 # 512
32
+ WARMUP_LR: .0
33
+ MIN_LR: .0
34
+ LR_LAYER_DECAY: false
35
+ OPTIMIZER:
36
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_a.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_a'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-a'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_5.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_r.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_r'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-r'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_5.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_sketch.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_sketch'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-sketch'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_5.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenetv2.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenetv2'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenetv2'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_5.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ TRANSFORM: 'build_transform_for_linear_probe'
5
+ DATA_PATH: './data/imagenet-1k'
6
+ IMG_SIZE: 448
7
+ MODEL:
8
+ TYPE: intern_vit_6b
9
+ DROP_PATH_RATE: 0.0
10
+ INTERN_VIT_6B:
11
+ FREEZE_VIT: True
12
+ PATCH_SIZE: 14
13
+ PRETRAIN_SIZE: 448
14
+ QKV_BIAS: False
15
+ EMBED_DIM: 3200
16
+ NUM_HEADS: 25
17
+ MLP_RATIO: 4
18
+ INIT_VALUES: 0.1
19
+ QK_NORMALIZATION: True
20
+ DEPTH: 45
21
+ USE_FLASH_ATTN: True
22
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v2_5.pth"
23
+ CLS_TARGET: 'attention_pooling'
24
+ TRAIN:
25
+ EMA:
26
+ ENABLE: True
27
+ DECAY: 0.998
28
+ EPOCHS: 10
29
+ WARMUP_EPOCHS: 1
30
+ WEIGHT_DECAY: 0.0
31
+ BASE_LR: 0.1 # 512
32
+ WARMUP_LR: .0
33
+ MIN_LR: .0
34
+ LR_LAYER_DECAY: false
35
+ OPTIMIZER:
36
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_a.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_a'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-a'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v2_5.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_r.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_r'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-r'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v2_5.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_real.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet-real'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-1k'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v2_5.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_sketch.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_sketch'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-sketch'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v2_5.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/attn_pooling_probing/attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenetv2.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenetv2'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenetv2'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v2_5.pth"
24
+ CLS_TARGET: 'attention_pooling'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu.yaml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ TRANSFORM: 'build_transform_for_linear_probe'
5
+ DATA_PATH: './data/imagenet-1k'
6
+ MODEL:
7
+ TYPE: intern_vit_6b
8
+ DROP_PATH_RATE: 0.0
9
+ INTERN_VIT_6B:
10
+ FREEZE_VIT: True
11
+ PATCH_SIZE: 14
12
+ PRETRAIN_SIZE: 224
13
+ QKV_BIAS: False
14
+ EMBED_DIM: 3200
15
+ NUM_HEADS: 25
16
+ MLP_RATIO: 4
17
+ INIT_VALUES: 0.1
18
+ QK_NORMALIZATION: True
19
+ DEPTH: 48
20
+ USE_FLASH_ATTN: True
21
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
22
+ CLS_TARGET: 'cls_patch_concat'
23
+ TRAIN:
24
+ EMA:
25
+ ENABLE: True
26
+ DECAY: 0.998
27
+ EPOCHS: 10
28
+ WARMUP_EPOCHS: 1
29
+ WEIGHT_DECAY: 0.0
30
+ BASE_LR: 0.1 # 512
31
+ WARMUP_LR: .0
32
+ MIN_LR: .0
33
+ LR_LAYER_DECAY: false
34
+ OPTIMIZER:
35
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_r.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_r'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-r'
7
+ MODEL:
8
+ TYPE: intern_vit_6b
9
+ DROP_PATH_RATE: 0.0
10
+ INTERN_VIT_6B:
11
+ FREEZE_VIT: True
12
+ PATCH_SIZE: 14
13
+ PRETRAIN_SIZE: 224
14
+ QKV_BIAS: False
15
+ EMBED_DIM: 3200
16
+ NUM_HEADS: 25
17
+ MLP_RATIO: 4
18
+ INIT_VALUES: 0.1
19
+ QK_NORMALIZATION: True
20
+ DEPTH: 48
21
+ USE_FLASH_ATTN: True
22
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
23
+ CLS_TARGET: 'cls_patch_concat'
24
+ TRAIN:
25
+ EMA:
26
+ ENABLE: True
27
+ DECAY: 0.998
28
+ EPOCHS: 10
29
+ WARMUP_EPOCHS: 1
30
+ WEIGHT_DECAY: 0.0
31
+ BASE_LR: 0.1 # 512
32
+ WARMUP_LR: .0
33
+ MIN_LR: .0
34
+ LR_LAYER_DECAY: false
35
+ OPTIMIZER:
36
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_real.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet-real'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-1k'
7
+ MODEL:
8
+ TYPE: intern_vit_6b
9
+ DROP_PATH_RATE: 0.0
10
+ INTERN_VIT_6B:
11
+ FREEZE_VIT: True
12
+ PATCH_SIZE: 14
13
+ PRETRAIN_SIZE: 224
14
+ QKV_BIAS: False
15
+ EMBED_DIM: 3200
16
+ NUM_HEADS: 25
17
+ MLP_RATIO: 4
18
+ INIT_VALUES: 0.1
19
+ QK_NORMALIZATION: True
20
+ DEPTH: 48
21
+ USE_FLASH_ATTN: True
22
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
23
+ CLS_TARGET: 'cls_patch_concat'
24
+ TRAIN:
25
+ EMA:
26
+ ENABLE: True
27
+ DECAY: 0.998
28
+ EPOCHS: 10
29
+ WARMUP_EPOCHS: 1
30
+ WEIGHT_DECAY: 0.0
31
+ BASE_LR: 0.1 # 512
32
+ WARMUP_LR: .0
33
+ MIN_LR: .0
34
+ LR_LAYER_DECAY: false
35
+ OPTIMIZER:
36
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_sketch.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_sketch'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-sketch'
7
+ MODEL:
8
+ TYPE: intern_vit_6b
9
+ DROP_PATH_RATE: 0.0
10
+ INTERN_VIT_6B:
11
+ FREEZE_VIT: True
12
+ PATCH_SIZE: 14
13
+ PRETRAIN_SIZE: 224
14
+ QKV_BIAS: False
15
+ EMBED_DIM: 3200
16
+ NUM_HEADS: 25
17
+ MLP_RATIO: 4
18
+ INIT_VALUES: 0.1
19
+ QK_NORMALIZATION: True
20
+ DEPTH: 48
21
+ USE_FLASH_ATTN: True
22
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
23
+ CLS_TARGET: 'cls_patch_concat'
24
+ TRAIN:
25
+ EMA:
26
+ ENABLE: True
27
+ DECAY: 0.998
28
+ EPOCHS: 10
29
+ WARMUP_EPOCHS: 1
30
+ WEIGHT_DECAY: 0.0
31
+ BASE_LR: 0.1 # 512
32
+ WARMUP_LR: .0
33
+ MIN_LR: .0
34
+ LR_LAYER_DECAY: false
35
+ OPTIMIZER:
36
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenetv2.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenetv2'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenetv2'
7
+ MODEL:
8
+ TYPE: intern_vit_6b
9
+ DROP_PATH_RATE: 0.0
10
+ INTERN_VIT_6B:
11
+ FREEZE_VIT: True
12
+ PATCH_SIZE: 14
13
+ PRETRAIN_SIZE: 224
14
+ QKV_BIAS: False
15
+ EMBED_DIM: 3200
16
+ NUM_HEADS: 25
17
+ MLP_RATIO: 4
18
+ INIT_VALUES: 0.1
19
+ QK_NORMALIZATION: True
20
+ DEPTH: 48
21
+ USE_FLASH_ATTN: True
22
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
23
+ CLS_TARGET: 'cls_patch_concat'
24
+ TRAIN:
25
+ EMA:
26
+ ENABLE: True
27
+ DECAY: 0.998
28
+ EPOCHS: 10
29
+ WARMUP_EPOCHS: 1
30
+ WEIGHT_DECAY: 0.0
31
+ BASE_LR: 0.1 # 512
32
+ WARMUP_LR: .0
33
+ MIN_LR: .0
34
+ LR_LAYER_DECAY: false
35
+ OPTIMIZER:
36
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_r.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_r'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-r'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 224
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 48
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
24
+ CLS_TARGET: 'cls_patch_concat'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_real.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet-real'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-1k'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 224
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 48
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
24
+ CLS_TARGET: 'cls_patch_concat'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_sketch.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_sketch'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-sketch'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 224
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 48
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
24
+ CLS_TARGET: 'cls_patch_concat'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenetv2.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenetv2'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenetv2'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 224
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 48
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_224px.pth"
24
+ CLS_TARGET: 'cls_patch_concat'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ TRANSFORM: 'build_transform_for_linear_probe'
5
+ DATA_PATH: './data/imagenet-1k'
6
+ IMG_SIZE: 448
7
+ MODEL:
8
+ TYPE: intern_vit_6b
9
+ DROP_PATH_RATE: 0.0
10
+ INTERN_VIT_6B:
11
+ FREEZE_VIT: True
12
+ PATCH_SIZE: 14
13
+ PRETRAIN_SIZE: 448
14
+ QKV_BIAS: False
15
+ EMBED_DIM: 3200
16
+ NUM_HEADS: 25
17
+ MLP_RATIO: 4
18
+ INIT_VALUES: 0.1
19
+ QK_NORMALIZATION: True
20
+ DEPTH: 45
21
+ USE_FLASH_ATTN: True
22
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
23
+ CLS_TARGET: 'cls_patch_concat'
24
+ TRAIN:
25
+ EMA:
26
+ ENABLE: True
27
+ DECAY: 0.998
28
+ EPOCHS: 10
29
+ WARMUP_EPOCHS: 1
30
+ WEIGHT_DECAY: 0.0
31
+ BASE_LR: 0.1 # 512
32
+ WARMUP_LR: .0
33
+ MIN_LR: .0
34
+ LR_LAYER_DECAY: false
35
+ OPTIMIZER:
36
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_a.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_a'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-a'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
24
+ CLS_TARGET: 'cls_patch_concat'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_real.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet-real'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-1k'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
24
+ CLS_TARGET: 'cls_patch_concat'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_sketch.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_sketch'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-sketch'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
24
+ CLS_TARGET: 'cls_patch_concat'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenetv2.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenetv2'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenetv2'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_0.pth"
24
+ CLS_TARGET: 'cls_patch_concat'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ TRANSFORM: 'build_transform_for_linear_probe'
5
+ DATA_PATH: './data/imagenet-1k'
6
+ IMG_SIZE: 448
7
+ MODEL:
8
+ TYPE: intern_vit_6b
9
+ DROP_PATH_RATE: 0.0
10
+ INTERN_VIT_6B:
11
+ FREEZE_VIT: True
12
+ PATCH_SIZE: 14
13
+ PRETRAIN_SIZE: 448
14
+ QKV_BIAS: False
15
+ EMBED_DIM: 3200
16
+ NUM_HEADS: 25
17
+ MLP_RATIO: 4
18
+ INIT_VALUES: 0.1
19
+ QK_NORMALIZATION: True
20
+ DEPTH: 45
21
+ USE_FLASH_ATTN: True
22
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
23
+ CLS_TARGET: 'cls_patch_concat'
24
+ TRAIN:
25
+ EMA:
26
+ ENABLE: True
27
+ DECAY: 0.998
28
+ EPOCHS: 10
29
+ WARMUP_EPOCHS: 1
30
+ WEIGHT_DECAY: 0.0
31
+ BASE_LR: 0.1 # 512
32
+ WARMUP_LR: .0
33
+ MIN_LR: .0
34
+ LR_LAYER_DECAY: false
35
+ OPTIMIZER:
36
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_a.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_a'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-a'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
24
+ CLS_TARGET: 'cls_patch_concat'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'
VLMEvalKit_old/InternVL/classification/configs/linear_probing/linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_r.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA:
2
+ IMG_ON_MEMORY: False
3
+ BATCH_SIZE: 16 # single GPU batch size
4
+ DATASET: 'imagenet_r'
5
+ TRANSFORM: 'build_transform_for_linear_probe'
6
+ DATA_PATH: './data/imagenet-r'
7
+ IMG_SIZE: 448
8
+ MODEL:
9
+ TYPE: intern_vit_6b
10
+ DROP_PATH_RATE: 0.0
11
+ INTERN_VIT_6B:
12
+ FREEZE_VIT: True
13
+ PATCH_SIZE: 14
14
+ PRETRAIN_SIZE: 448
15
+ QKV_BIAS: False
16
+ EMBED_DIM: 3200
17
+ NUM_HEADS: 25
18
+ MLP_RATIO: 4
19
+ INIT_VALUES: 0.1
20
+ QK_NORMALIZATION: True
21
+ DEPTH: 45
22
+ USE_FLASH_ATTN: True
23
+ PRETRAINED: "./pretrained/intern_vit_6b_448px_v1_2.pth"
24
+ CLS_TARGET: 'cls_patch_concat'
25
+ TRAIN:
26
+ EMA:
27
+ ENABLE: True
28
+ DECAY: 0.998
29
+ EPOCHS: 10
30
+ WARMUP_EPOCHS: 1
31
+ WEIGHT_DECAY: 0.0
32
+ BASE_LR: 0.1 # 512
33
+ WARMUP_LR: .0
34
+ MIN_LR: .0
35
+ LR_LAYER_DECAY: false
36
+ OPTIMIZER:
37
+ NAME: 'sgd'