File size: 1,701 Bytes
56ef371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# Builders for different vision tower backbones (MM encoder visual modules)
from .qwen2_5_vl_encoder import Qwen2_5_VlVisionTower    # Main Qwen2.5 vision tower
from .davit_aux_encoder import DavitVisionTower as DavitVisionTowerAux  # Auxiliary DaViT vision tower

def build_vision_tower(vision_tower_cfg, **kwargs):
    """
    Use model config to construct the main vision tower.

    vision_tower_cfg: should have attribute mm_vision_tower
    Returns: instance of configured vision backbone
    """
    vision_tower_name = getattr(vision_tower_cfg, 'mm_vision_tower', None)
    print(vision_tower_cfg)  # Debug print of the config being used
    
    # Check for the Qwen2.5-VL vision model in tower name
    if "qwen2.5-vl" in vision_tower_name.lower():
        return Qwen2_5_VlVisionTower(vision_tower_name, args=vision_tower_cfg, **kwargs) 

    # Raise a clear error for unknown towers
    raise ValueError(f'Unknown vision tower: {vision_tower_name}')

def build_vision_tower_aux(vision_tower_cfg, **kwargs):
    """
    Use model config to construct the auxiliary (helper) vision tower.

    vision_tower_cfg: should have attribute mm_vision_tower_aux
    Returns: instance of configured auxiliary vision backbone
    """
    vision_tower_aux = getattr(vision_tower_cfg, 'mm_vision_tower_aux', None)
    # Optionally print config for debugging
    # print(vision_tower_cfg)

    # Check for the DaViT auxiliary vision model in tower name
    if 'davit' in vision_tower_aux.lower():
        return DavitVisionTowerAux(vision_tower_aux, args=vision_tower_cfg, **kwargs)

    # Raise a clear error if tower type is unknown
    raise ValueError(f'Unknown aux vision tower: {vision_tower_aux}')