| |
| |
| """ |
| ================================================ |
| @author: Jaron |
| @time: 2024/08/21 17:51:45 |
| @email: fjjth98@163.com |
| @description: |
| ================================================ |
| """ |
| from transformers import PretrainedConfig |
|
|
|
|
| class VideoCCAMConfig(PretrainedConfig): |
| model_type = 'videoccam' |
| _auto_class = 'AutoConfig' |
|
|
| def __init__( |
| self, |
| llm_name_or_path: str = None, |
| projector_name_or_path: str = None, |
| vision_encoder_name_or_path: str = None, |
| image_token: str = '<image>', |
| video_token: str = '<video>', |
| vision_select_layer: int = -2, |
| vision_max_chunk_size: int = 0, |
| _attn_implementation: str = 'flash_attention_2', |
| **kwargs |
| ): |
| super().__init__(**kwargs) |
| self.llm_name_or_path = llm_name_or_path |
| self.projector_name_or_path = projector_name_or_path |
| self.vision_encoder_name_or_path = vision_encoder_name_or_path |
| self.image_token = image_token |
| self.video_token = video_token |
| self.vision_select_layer = vision_select_layer |
| self.vision_max_chunk_size = vision_max_chunk_size |
| self._attn_implementation = _attn_implementation |
|
|