phonghoccode commited on
Commit
56de355
·
verified ·
1 Parent(s): f19db01

Upload CustomViltForVQA

Browse files
Files changed (2) hide show
  1. config.json +212 -212
  2. model.safetensors +1 -1
config.json CHANGED
@@ -1,234 +1,234 @@
1
  {
2
  "_name_or_path": "dandelin/vilt-b32-finetuned-vqa",
3
  "architectures": [
4
- "ViltForQuestionAnswering"
5
  ],
6
  "attention_probs_dropout_prob": 0.0,
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.0,
9
  "hidden_size": 768,
10
  "id2label": {
11
- "0": "Front",
12
- "1": "Left",
13
- "2": "Below",
14
- "3": "left",
15
- "4": "refrigerator",
16
- "5": "banana",
17
- "6": "stop sign",
18
- "7": "right",
19
- "8": "bottle",
20
- "9": "Red",
21
- "10": "teddy bear",
22
- "11": "bus",
23
- "12": "baseball glove",
24
- "13": "couch",
25
- "14": "apple",
26
- "15": "Orange",
27
- "16": "above",
28
- "17": "vase",
29
- "18": "clock",
30
- "19": "frisbee",
31
- "20": "snowboard",
32
- "21": "spoon",
33
- "22": "dining table",
34
- "23": "laptop",
35
- "24": "Purple",
36
- "25": "Yellow",
37
- "26": "truck",
38
- "27": "cake",
39
- "28": "dog",
40
- "29": "donut",
41
- "30": "No",
42
- "31": "bird",
43
- "32": "handbag",
44
- "33": "sheep",
45
- "34": "orange",
46
  "35": "Right",
47
- "36": "Brown",
48
- "37": "1",
49
- "38": "keyboard",
50
- "39": "sink",
51
- "40": "9",
52
- "41": "7",
53
- "42": "4",
54
- "43": "horse",
55
- "44": "remote",
56
- "45": "traffic light",
57
- "46": "toothbrush",
58
- "47": "bed",
59
- "48": "zebra",
60
- "49": "White",
61
- "50": "tie",
62
- "51": "surfboard",
63
- "52": "elephant",
64
- "53": "bear",
65
- "54": "Above",
66
- "55": "train",
67
  "56": "hot dog",
68
- "57": "skateboard",
69
- "58": "bicycle",
70
- "59": "wine glass",
71
- "60": "Behind",
72
- "61": "3",
73
- "62": "kite",
74
- "63": "knife",
75
- "64": "potted plant",
76
- "65": "cup",
77
- "66": "car",
78
- "67": "Blue",
79
- "68": "8",
80
- "69": "chair",
81
- "70": "Black",
82
- "71": "sports ball",
83
- "72": "cow",
84
- "73": "bench",
85
- "74": "fire hydrant",
86
- "75": "parking meter",
87
- "76": "umbrella",
88
- "77": "0",
89
- "78": "skis",
90
- "79": "person",
91
- "80": "mouse",
92
- "81": "scissors",
93
- "82": "bowl",
94
- "83": "tv",
95
- "84": "cell phone",
96
- "85": "toilet",
97
- "86": "sandwich",
98
- "87": "giraffe",
99
- "88": "2",
100
- "89": "carrot",
101
- "90": "cat",
102
- "91": "tennis racket",
103
- "92": "Grey",
104
- "93": "Yes",
105
- "94": "suitcase",
106
- "95": "broccoli",
107
- "96": "fork",
108
  "97": "Pink",
109
- "98": "motorcycle",
110
- "99": "oven",
111
- "100": "6",
112
- "101": "baseball bat",
113
- "102": "book",
114
- "103": "5",
115
- "104": "pizza",
116
- "105": "boat",
117
- "106": "airplane",
118
- "107": "Green"
119
  },
120
  "image_size": 384,
121
  "initializer_range": 0.02,
122
  "intermediate_size": 3072,
123
  "label2id": {
124
- "0": 77,
125
- "1": 37,
126
- "2": 88,
127
- "3": 61,
128
- "4": 42,
129
- "5": 103,
130
- "6": 100,
131
- "7": 41,
132
- "8": 68,
133
- "9": 40,
134
- "Above": 54,
135
- "Behind": 60,
136
- "Below": 2,
137
- "Black": 70,
138
- "Blue": 67,
139
- "Brown": 36,
140
- "Front": 0,
141
- "Green": 107,
142
- "Grey": 92,
143
- "Left": 1,
144
- "No": 30,
145
- "Orange": 15,
146
  "Pink": 97,
147
- "Purple": 24,
148
- "Red": 9,
149
  "Right": 35,
150
- "White": 49,
151
- "Yellow": 25,
152
- "Yes": 93,
153
- "above": 16,
154
- "airplane": 106,
155
- "apple": 14,
156
- "banana": 5,
157
- "baseball bat": 101,
158
- "baseball glove": 12,
159
- "bear": 53,
160
- "bed": 47,
161
- "bench": 73,
162
- "bicycle": 58,
163
- "bird": 31,
164
- "boat": 105,
165
- "book": 102,
166
- "bottle": 8,
167
- "bowl": 82,
168
- "broccoli": 95,
169
- "bus": 11,
170
- "cake": 27,
171
- "car": 66,
172
- "carrot": 89,
173
- "cat": 90,
174
- "cell phone": 84,
175
- "chair": 69,
176
- "clock": 18,
177
- "couch": 13,
178
- "cow": 72,
179
- "cup": 65,
180
- "dining table": 22,
181
- "dog": 28,
182
- "donut": 29,
183
- "elephant": 52,
184
- "fire hydrant": 74,
185
- "fork": 96,
186
- "frisbee": 19,
187
- "giraffe": 87,
188
- "handbag": 32,
189
- "horse": 43,
190
  "hot dog": 56,
191
- "keyboard": 38,
192
- "kite": 62,
193
- "knife": 63,
194
- "laptop": 23,
195
- "left": 3,
196
- "motorcycle": 98,
197
- "mouse": 80,
198
- "orange": 34,
199
- "oven": 99,
200
- "parking meter": 75,
201
- "person": 79,
202
- "pizza": 104,
203
- "potted plant": 64,
204
- "refrigerator": 4,
205
- "remote": 44,
206
- "right": 7,
207
- "sandwich": 86,
208
- "scissors": 81,
209
- "sheep": 33,
210
- "sink": 39,
211
- "skateboard": 57,
212
- "skis": 78,
213
- "snowboard": 20,
214
- "spoon": 21,
215
- "sports ball": 71,
216
- "stop sign": 6,
217
- "suitcase": 94,
218
- "surfboard": 51,
219
- "teddy bear": 10,
220
- "tennis racket": 91,
221
- "tie": 50,
222
- "toilet": 85,
223
- "toothbrush": 46,
224
- "traffic light": 45,
225
- "train": 55,
226
- "truck": 26,
227
- "tv": 83,
228
- "umbrella": 76,
229
- "vase": 17,
230
- "wine glass": 59,
231
- "zebra": 48
232
  },
233
  "layer_norm_eps": 1e-12,
234
  "max_image_length": -1,
@@ -243,7 +243,7 @@
243
  "qkv_bias": true,
244
  "tie_word_embeddings": false,
245
  "torch_dtype": "float32",
246
- "transformers_version": "4.47.0",
247
  "type_vocab_size": 2,
248
  "vocab_size": 30522
249
  }
 
1
  {
2
  "_name_or_path": "dandelin/vilt-b32-finetuned-vqa",
3
  "architectures": [
4
+ "CustomViltForVQA"
5
  ],
6
  "attention_probs_dropout_prob": 0.0,
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.0,
9
  "hidden_size": 768,
10
  "id2label": {
11
+ "0": "boat",
12
+ "1": "above",
13
+ "2": "apple",
14
+ "3": "clock",
15
+ "4": "9",
16
+ "5": "wine glass",
17
+ "6": "mouse",
18
+ "7": "2",
19
+ "8": "sheep",
20
+ "9": "couch",
21
+ "10": "snowboard",
22
+ "11": "Yes",
23
+ "12": "spoon",
24
+ "13": "sports ball",
25
+ "14": "handbag",
26
+ "15": "Blue",
27
+ "16": "5",
28
+ "17": "laptop",
29
+ "18": "toothbrush",
30
+ "19": "carrot",
31
+ "20": "cake",
32
+ "21": "dog",
33
+ "22": "White",
34
+ "23": "motorcycle",
35
+ "24": "chair",
36
+ "25": "train",
37
+ "26": "zebra",
38
+ "27": "6",
39
+ "28": "person",
40
+ "29": "parking meter",
41
+ "30": "Brown",
42
+ "31": "car",
43
+ "32": "sink",
44
+ "33": "right",
45
+ "34": "bear",
46
  "35": "Right",
47
+ "36": "sandwich",
48
+ "37": "Grey",
49
+ "38": "Red",
50
+ "39": "surfboard",
51
+ "40": "bowl",
52
+ "41": "airplane",
53
+ "42": "potted plant",
54
+ "43": "7",
55
+ "44": "tv",
56
+ "45": "suitcase",
57
+ "46": "kite",
58
+ "47": "tennis racket",
59
+ "48": "bench",
60
+ "49": "0",
61
+ "50": "bed",
62
+ "51": "dining table",
63
+ "52": "No",
64
+ "53": "refrigerator",
65
+ "54": "giraffe",
66
+ "55": "Purple",
67
  "56": "hot dog",
68
+ "57": "truck",
69
+ "58": "vase",
70
+ "59": "Orange",
71
+ "60": "tie",
72
+ "61": "broccoli",
73
+ "62": "umbrella",
74
+ "63": "Green",
75
+ "64": "left",
76
+ "65": "stop sign",
77
+ "66": "cat",
78
+ "67": "teddy bear",
79
+ "68": "bicycle",
80
+ "69": "orange",
81
+ "70": "3",
82
+ "71": "scissors",
83
+ "72": "baseball glove",
84
+ "73": "frisbee",
85
+ "74": "4",
86
+ "75": "keyboard",
87
+ "76": "banana",
88
+ "77": "bus",
89
+ "78": "1",
90
+ "79": "Black",
91
+ "80": "8",
92
+ "81": "fork",
93
+ "82": "baseball bat",
94
+ "83": "donut",
95
+ "84": "book",
96
+ "85": "Above",
97
+ "86": "skis",
98
+ "87": "oven",
99
+ "88": "Below",
100
+ "89": "cell phone",
101
+ "90": "Left",
102
+ "91": "fire hydrant",
103
+ "92": "Behind",
104
+ "93": "cup",
105
+ "94": "elephant",
106
+ "95": "pizza",
107
+ "96": "bird",
108
  "97": "Pink",
109
+ "98": "knife",
110
+ "99": "skateboard",
111
+ "100": "horse",
112
+ "101": "Front",
113
+ "102": "Yellow",
114
+ "103": "traffic light",
115
+ "104": "remote",
116
+ "105": "cow",
117
+ "106": "toilet",
118
+ "107": "bottle"
119
  },
120
  "image_size": 384,
121
  "initializer_range": 0.02,
122
  "intermediate_size": 3072,
123
  "label2id": {
124
+ "0": 49,
125
+ "1": 78,
126
+ "2": 7,
127
+ "3": 70,
128
+ "4": 74,
129
+ "5": 16,
130
+ "6": 27,
131
+ "7": 43,
132
+ "8": 80,
133
+ "9": 4,
134
+ "Above": 85,
135
+ "Behind": 92,
136
+ "Below": 88,
137
+ "Black": 79,
138
+ "Blue": 15,
139
+ "Brown": 30,
140
+ "Front": 101,
141
+ "Green": 63,
142
+ "Grey": 37,
143
+ "Left": 90,
144
+ "No": 52,
145
+ "Orange": 59,
146
  "Pink": 97,
147
+ "Purple": 55,
148
+ "Red": 38,
149
  "Right": 35,
150
+ "White": 22,
151
+ "Yellow": 102,
152
+ "Yes": 11,
153
+ "above": 1,
154
+ "airplane": 41,
155
+ "apple": 2,
156
+ "banana": 76,
157
+ "baseball bat": 82,
158
+ "baseball glove": 72,
159
+ "bear": 34,
160
+ "bed": 50,
161
+ "bench": 48,
162
+ "bicycle": 68,
163
+ "bird": 96,
164
+ "boat": 0,
165
+ "book": 84,
166
+ "bottle": 107,
167
+ "bowl": 40,
168
+ "broccoli": 61,
169
+ "bus": 77,
170
+ "cake": 20,
171
+ "car": 31,
172
+ "carrot": 19,
173
+ "cat": 66,
174
+ "cell phone": 89,
175
+ "chair": 24,
176
+ "clock": 3,
177
+ "couch": 9,
178
+ "cow": 105,
179
+ "cup": 93,
180
+ "dining table": 51,
181
+ "dog": 21,
182
+ "donut": 83,
183
+ "elephant": 94,
184
+ "fire hydrant": 91,
185
+ "fork": 81,
186
+ "frisbee": 73,
187
+ "giraffe": 54,
188
+ "handbag": 14,
189
+ "horse": 100,
190
  "hot dog": 56,
191
+ "keyboard": 75,
192
+ "kite": 46,
193
+ "knife": 98,
194
+ "laptop": 17,
195
+ "left": 64,
196
+ "motorcycle": 23,
197
+ "mouse": 6,
198
+ "orange": 69,
199
+ "oven": 87,
200
+ "parking meter": 29,
201
+ "person": 28,
202
+ "pizza": 95,
203
+ "potted plant": 42,
204
+ "refrigerator": 53,
205
+ "remote": 104,
206
+ "right": 33,
207
+ "sandwich": 36,
208
+ "scissors": 71,
209
+ "sheep": 8,
210
+ "sink": 32,
211
+ "skateboard": 99,
212
+ "skis": 86,
213
+ "snowboard": 10,
214
+ "spoon": 12,
215
+ "sports ball": 13,
216
+ "stop sign": 65,
217
+ "suitcase": 45,
218
+ "surfboard": 39,
219
+ "teddy bear": 67,
220
+ "tennis racket": 47,
221
+ "tie": 60,
222
+ "toilet": 106,
223
+ "toothbrush": 18,
224
+ "traffic light": 103,
225
+ "train": 25,
226
+ "truck": 57,
227
+ "tv": 44,
228
+ "umbrella": 62,
229
+ "vase": 58,
230
+ "wine glass": 5,
231
+ "zebra": 26
232
  },
233
  "layer_norm_eps": 1e-12,
234
  "max_image_length": -1,
 
243
  "qkv_bias": true,
244
  "tie_word_embeddings": false,
245
  "torch_dtype": "float32",
246
+ "transformers_version": "4.35.2",
247
  "type_vocab_size": 2,
248
  "vocab_size": 30522
249
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed63c524be59be652b00f1cf6d13adf15695129ee961964d7200f1bcd5c8a822
3
  size 451805856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6b0c4270fa9bd8aff01435a16088e82bf69fe503393723e7aa34ce09f5d80b1
3
  size 451805856