momergul commited on
Commit
301879b
·
verified ·
1 Parent(s): 18a2b8d

Upload configuration_git.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. configuration_git.py +164 -0
configuration_git.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022 The HuggingFace Inc. team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import os
17
+ from typing import Union
18
+
19
+ from transformers.configuration_utils import PretrainedConfig
20
+ import transformers.models.git.configuration_git as configuration_git
21
+
22
+
23
+ GIT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
24
+ "microsoft/git-base": "https://huggingface.co/microsoft/git-base/resolve/main/config.json",
25
+ }
26
+
27
+
28
+ class GitVisionConfig(configuration_git.GitVisionConfig, dict):
29
+ def __init__(self, *args, **kwargs):
30
+ configuration_git.GitVisionConfig.__init__(
31
+ self, *args, **kwargs)
32
+ dict.__init__(self, **self.__dict__)
33
+
34
+ def toJSON(self):
35
+ return json.dumps(
36
+ self,
37
+ default=lambda o: o.__dict__,
38
+ sort_keys=True,
39
+ indent=4)
40
+
41
+
42
+ class GitConfig(PretrainedConfig, dict):
43
+ r"""
44
+ This is the configuration class to store the configuration of a [`GitModel`]. It is used to instantiate a GIT model
45
+ according to the specified arguments, defining the model architecture. Instantiating a configuration with the
46
+ defaults will yield a similar configuration to that of the GIT
47
+ [microsoft/git-base](https://huggingface.co/microsoft/git-base) architecture.
48
+
49
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
50
+ documentation from [`PretrainedConfig`] for more information.
51
+
52
+ Args:
53
+ vision_config (`dict`, *optional*):
54
+ Dictionary of configuration options used to initialize [`GitVisionConfig`].
55
+ vocab_size (`int`, *optional*, defaults to 30522):
56
+ Vocabulary size of the GIT model. Defines the number of different tokens that can be represented by the
57
+ `inputs_ids` passed when calling [`GitModel`].
58
+ hidden_size (`int`, *optional*, defaults to 768):
59
+ Dimensionality of the encoder layers and the pooler layer.
60
+ num_hidden_layers (`int`, *optional*, defaults to 6):
61
+ Number of hidden layers in the Transformer encoder.
62
+ num_attention_heads (`int`, *optional*, defaults to 12):
63
+ Number of attention heads for each attention layer in the Transformer encoder.
64
+ intermediate_size (`int`, *optional*, defaults to 3072):
65
+ Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
66
+ hidden_act (`str` or `Callable`, *optional*, defaults to `"gelu"`):
67
+ The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
68
+ `"relu"`, `"silu"` and `"gelu_new"` are supported.
69
+ hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
70
+ The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
71
+ attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
72
+ The dropout ratio for the attention probabilities.
73
+ max_position_embeddings (`int`, *optional*, defaults to 1024):
74
+ The maximum sequence length that this model might ever be used with. Typically set this to something large
75
+ just in case (e.g., 512 or 1024 or 2048).
76
+ initializer_range (`float`, *optional*, defaults to 0.02):
77
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
78
+ layer_norm_eps (`float`, *optional*, defaults to 1e-12):
79
+ The epsilon used by the layer normalization layers.
80
+ position_embedding_type (`str`, *optional*, defaults to `"absolute"`):
81
+ Type of position embedding. Choose one of `"absolute"`, `"relative_key"`, `"relative_key_query"`. For
82
+ positional embeddings use `"absolute"`. For more information on `"relative_key"`, please refer to
83
+ [Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155).
84
+ For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models
85
+ with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658).
86
+ use_cache (`bool`, *optional*, defaults to `True`):
87
+ Whether or not the model should return the last key/values attentions (not used by all models).
88
+ num_image_with_embedding (`int`, *optional*):
89
+ The number of temporal embeddings to add, in case the model is used for video captioning/VQA.
90
+
91
+ Examples:
92
+
93
+ ```python
94
+ >>> from transformers import GitConfig, GitModel
95
+
96
+ >>> # Initializing a GIT microsoft/git-base style configuration
97
+ >>> configuration = GitConfig()
98
+
99
+ >>> # Initializing a model (with random weights) from the microsoft/git-base style configuration
100
+ >>> model = GitModel(configuration)
101
+
102
+ >>> # Accessing the model configuration
103
+ >>> configuration = model.config
104
+ ```"""
105
+
106
+ model_type = "git"
107
+
108
+ def __init__(
109
+ self,
110
+ vision_config=None,
111
+ vocab_size=32778,
112
+ hidden_size=768,
113
+ num_hidden_layers=6,
114
+ num_attention_heads=12,
115
+ intermediate_size=3072,
116
+ hidden_act="gelu",
117
+ hidden_dropout_prob=0.1,
118
+ attention_probs_dropout_prob=0.1,
119
+ max_position_embeddings=1024,
120
+ initializer_range=0.02,
121
+ layer_norm_eps=1e-12,
122
+ pad_token_id=0,
123
+ position_embedding_type="absolute",
124
+ use_cache=True,
125
+ tie_word_embeddings=True,
126
+ bos_token_id=101,
127
+ eos_token_id=102,
128
+ num_image_with_embedding=None,
129
+ **kwargs,
130
+ ):
131
+ PretrainedConfig.__init__(
132
+ self,
133
+ bos_token_id=bos_token_id, eos_token_id=eos_token_id, pad_token_id=pad_token_id, **kwargs)
134
+
135
+ if vision_config is None:
136
+ vision_config = {}
137
+ self.vision_config = GitVisionConfig(**vision_config)
138
+ self.vocab_size = vocab_size
139
+ self.hidden_size = hidden_size
140
+ self.num_hidden_layers = num_hidden_layers
141
+ self.num_attention_heads = num_attention_heads
142
+ self.hidden_act = hidden_act
143
+ self.intermediate_size = intermediate_size
144
+ self.hidden_dropout_prob = hidden_dropout_prob
145
+ self.attention_probs_dropout_prob = attention_probs_dropout_prob
146
+ self.max_position_embeddings = max_position_embeddings
147
+ self.initializer_range = initializer_range
148
+ self.layer_norm_eps = layer_norm_eps
149
+ self.position_embedding_type = position_embedding_type
150
+ self.use_cache = use_cache
151
+ self.tie_word_embeddings = tie_word_embeddings
152
+ self.num_image_with_embedding = num_image_with_embedding
153
+
154
+ self.bos_token_id = bos_token_id
155
+ self.eos_token_id = eos_token_id
156
+
157
+ dict.__init__(self, **self.__dict__)
158
+
159
+ def toJSON(self):
160
+ return json.dumps(
161
+ self,
162
+ default=lambda o: o.__dict__,
163
+ sort_keys=True,
164
+ indent=4)