phonghoccode commited on
Commit
813a487
·
verified ·
1 Parent(s): f95eca8

Upload CustomViltForVQA

Browse files
Files changed (2) hide show
  1. config.json +217 -217
  2. model.safetensors +1 -1
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "dandelin/vilt-b32-finetuned-vqa",
3
  "architectures": [
4
  "CustomViltForVQA"
5
  ],
@@ -8,227 +8,227 @@
8
  "hidden_dropout_prob": 0.0,
9
  "hidden_size": 768,
10
  "id2label": {
11
- "0": "boat",
12
- "1": "above",
13
- "2": "apple",
14
- "3": "clock",
15
- "4": "9",
16
- "5": "wine glass",
17
- "6": "mouse",
18
- "7": "2",
19
- "8": "sheep",
20
- "9": "couch",
21
- "10": "snowboard",
22
- "11": "Yes",
23
- "12": "spoon",
24
- "13": "sports ball",
25
- "14": "handbag",
26
- "15": "Blue",
27
- "16": "5",
28
- "17": "laptop",
29
- "18": "toothbrush",
30
- "19": "carrot",
31
- "20": "cake",
32
- "21": "dog",
33
- "22": "White",
34
- "23": "motorcycle",
35
- "24": "chair",
36
- "25": "train",
37
- "26": "zebra",
38
- "27": "6",
39
- "28": "person",
40
- "29": "parking meter",
41
- "30": "Brown",
42
- "31": "car",
43
- "32": "sink",
44
- "33": "right",
45
- "34": "bear",
46
- "35": "Right",
47
- "36": "sandwich",
48
- "37": "Grey",
49
- "38": "Red",
50
- "39": "surfboard",
51
- "40": "bowl",
52
- "41": "airplane",
53
- "42": "potted plant",
54
- "43": "7",
55
- "44": "tv",
56
- "45": "suitcase",
57
- "46": "kite",
58
- "47": "tennis racket",
59
- "48": "bench",
60
- "49": "0",
61
- "50": "bed",
62
- "51": "dining table",
63
- "52": "No",
64
- "53": "refrigerator",
65
- "54": "giraffe",
66
- "55": "Purple",
67
- "56": "hot dog",
68
- "57": "truck",
69
- "58": "vase",
70
- "59": "Orange",
71
- "60": "tie",
72
- "61": "broccoli",
73
- "62": "umbrella",
74
- "63": "Green",
75
- "64": "left",
76
- "65": "stop sign",
77
- "66": "cat",
78
- "67": "teddy bear",
79
- "68": "bicycle",
80
- "69": "orange",
81
- "70": "3",
82
- "71": "scissors",
83
- "72": "baseball glove",
84
- "73": "frisbee",
85
- "74": "4",
86
- "75": "keyboard",
87
- "76": "banana",
88
- "77": "bus",
89
- "78": "1",
90
- "79": "Black",
91
- "80": "8",
92
- "81": "fork",
93
- "82": "baseball bat",
94
- "83": "donut",
95
- "84": "book",
96
- "85": "Above",
97
- "86": "skis",
98
- "87": "oven",
99
- "88": "Below",
100
- "89": "cell phone",
101
- "90": "Left",
102
- "91": "fire hydrant",
103
- "92": "Behind",
104
- "93": "cup",
105
- "94": "elephant",
106
- "95": "pizza",
107
- "96": "bird",
108
- "97": "Pink",
109
- "98": "knife",
110
- "99": "skateboard",
111
- "100": "horse",
112
- "101": "Front",
113
- "102": "Yellow",
114
- "103": "traffic light",
115
- "104": "remote",
116
- "105": "cow",
117
- "106": "toilet",
118
- "107": "bottle"
119
  },
120
  "image_size": 384,
121
  "initializer_range": 0.02,
122
  "intermediate_size": 3072,
123
  "label2id": {
124
- "0": 49,
125
- "1": 78,
126
- "2": 7,
127
- "3": 70,
128
- "4": 74,
129
- "5": 16,
130
- "6": 27,
131
- "7": 43,
132
- "8": 80,
133
- "9": 4,
134
- "Above": 85,
135
- "Behind": 92,
136
- "Below": 88,
137
- "Black": 79,
138
- "Blue": 15,
139
- "Brown": 30,
140
- "Front": 101,
141
- "Green": 63,
142
- "Grey": 37,
143
- "Left": 90,
144
- "No": 52,
145
- "Orange": 59,
146
- "Pink": 97,
147
- "Purple": 55,
148
- "Red": 38,
149
- "Right": 35,
150
- "White": 22,
151
- "Yellow": 102,
152
- "Yes": 11,
153
- "above": 1,
154
- "airplane": 41,
155
- "apple": 2,
156
- "banana": 76,
157
- "baseball bat": 82,
158
- "baseball glove": 72,
159
- "bear": 34,
160
- "bed": 50,
161
- "bench": 48,
162
- "bicycle": 68,
163
- "bird": 96,
164
- "boat": 0,
165
- "book": 84,
166
- "bottle": 107,
167
- "bowl": 40,
168
- "broccoli": 61,
169
- "bus": 77,
170
- "cake": 20,
171
- "car": 31,
172
- "carrot": 19,
173
- "cat": 66,
174
- "cell phone": 89,
175
- "chair": 24,
176
- "clock": 3,
177
- "couch": 9,
178
- "cow": 105,
179
- "cup": 93,
180
- "dining table": 51,
181
- "dog": 21,
182
- "donut": 83,
183
- "elephant": 94,
184
- "fire hydrant": 91,
185
- "fork": 81,
186
- "frisbee": 73,
187
- "giraffe": 54,
188
- "handbag": 14,
189
- "horse": 100,
190
- "hot dog": 56,
191
- "keyboard": 75,
192
- "kite": 46,
193
- "knife": 98,
194
- "laptop": 17,
195
- "left": 64,
196
- "motorcycle": 23,
197
- "mouse": 6,
198
- "orange": 69,
199
- "oven": 87,
200
- "parking meter": 29,
201
- "person": 28,
202
- "pizza": 95,
203
- "potted plant": 42,
204
- "refrigerator": 53,
205
- "remote": 104,
206
- "right": 33,
207
- "sandwich": 36,
208
- "scissors": 71,
209
- "sheep": 8,
210
- "sink": 32,
211
- "skateboard": 99,
212
- "skis": 86,
213
- "snowboard": 10,
214
- "spoon": 12,
215
- "sports ball": 13,
216
- "stop sign": 65,
217
- "suitcase": 45,
218
- "surfboard": 39,
219
- "teddy bear": 67,
220
- "tennis racket": 47,
221
- "tie": 60,
222
- "toilet": 106,
223
- "toothbrush": 18,
224
- "traffic light": 103,
225
- "train": 25,
226
- "truck": 57,
227
- "tv": 44,
228
- "umbrella": 62,
229
- "vase": 58,
230
- "wine glass": 5,
231
- "zebra": 26
232
  },
233
  "layer_norm_eps": 1e-12,
234
  "max_image_length": -1,
 
1
  {
2
+ "_name_or_path": "phonghoccode/vilt-vqa-finetune",
3
  "architectures": [
4
  "CustomViltForVQA"
5
  ],
 
8
  "hidden_dropout_prob": 0.0,
9
  "hidden_size": 768,
10
  "id2label": {
11
+ "0": "sink",
12
+ "1": "zebra",
13
+ "2": "Right",
14
+ "3": "toothbrush",
15
+ "4": "hot dog",
16
+ "5": "traffic light",
17
+ "6": "chair",
18
+ "7": "mouse",
19
+ "8": "bicycle",
20
+ "9": "2",
21
+ "10": "bus",
22
+ "11": "fork",
23
+ "12": "carrot",
24
+ "13": "car",
25
+ "14": "Blue",
26
+ "15": "pizza",
27
+ "16": "9",
28
+ "17": "bear",
29
+ "18": "banana",
30
+ "19": "kite",
31
+ "20": "6",
32
+ "21": "dining table",
33
+ "22": "above",
34
+ "23": "sheep",
35
+ "24": "truck",
36
+ "25": "5",
37
+ "26": "8",
38
+ "27": "Left",
39
+ "28": "0",
40
+ "29": "oven",
41
+ "30": "apple",
42
+ "31": "handbag",
43
+ "32": "cup",
44
+ "33": "umbrella",
45
+ "34": "couch",
46
+ "35": "stop sign",
47
+ "36": "Above",
48
+ "37": "refrigerator",
49
+ "38": "giraffe",
50
+ "39": "train",
51
+ "40": "remote",
52
+ "41": "Below",
53
+ "42": "cake",
54
+ "43": "3",
55
+ "44": "clock",
56
+ "45": "bottle",
57
+ "46": "tie",
58
+ "47": "knife",
59
+ "48": "dog",
60
+ "49": "White",
61
+ "50": "skateboard",
62
+ "51": "sandwich",
63
+ "52": "Front",
64
+ "53": "cat",
65
+ "54": "suitcase",
66
+ "55": "snowboard",
67
+ "56": "broccoli",
68
+ "57": "tv",
69
+ "58": "elephant",
70
+ "59": "tennis racket",
71
+ "60": "baseball glove",
72
+ "61": "motorcycle",
73
+ "62": "right",
74
+ "63": "bird",
75
+ "64": "Brown",
76
+ "65": "airplane",
77
+ "66": "1",
78
+ "67": "bench",
79
+ "68": "boat",
80
+ "69": "No",
81
+ "70": "book",
82
+ "71": "Orange",
83
+ "72": "donut",
84
+ "73": "Red",
85
+ "74": "Behind",
86
+ "75": "laptop",
87
+ "76": "orange",
88
+ "77": "teddy bear",
89
+ "78": "Black",
90
+ "79": "person",
91
+ "80": "surfboard",
92
+ "81": "7",
93
+ "82": "4",
94
+ "83": "Green",
95
+ "84": "scissors",
96
+ "85": "Grey",
97
+ "86": "left",
98
+ "87": "horse",
99
+ "88": "vase",
100
+ "89": "potted plant",
101
+ "90": "sports ball",
102
+ "91": "Purple",
103
+ "92": "spoon",
104
+ "93": "parking meter",
105
+ "94": "keyboard",
106
+ "95": "cow",
107
+ "96": "wine glass",
108
+ "97": "fire hydrant",
109
+ "98": "bed",
110
+ "99": "baseball bat",
111
+ "100": "skis",
112
+ "101": "Yes",
113
+ "102": "frisbee",
114
+ "103": "toilet",
115
+ "104": "Pink",
116
+ "105": "Yellow",
117
+ "106": "bowl",
118
+ "107": "cell phone"
119
  },
120
  "image_size": 384,
121
  "initializer_range": 0.02,
122
  "intermediate_size": 3072,
123
  "label2id": {
124
+ "0": 28,
125
+ "1": 66,
126
+ "2": 9,
127
+ "3": 43,
128
+ "4": 82,
129
+ "5": 25,
130
+ "6": 20,
131
+ "7": 81,
132
+ "8": 26,
133
+ "9": 16,
134
+ "Above": 36,
135
+ "Behind": 74,
136
+ "Below": 41,
137
+ "Black": 78,
138
+ "Blue": 14,
139
+ "Brown": 64,
140
+ "Front": 52,
141
+ "Green": 83,
142
+ "Grey": 85,
143
+ "Left": 27,
144
+ "No": 69,
145
+ "Orange": 71,
146
+ "Pink": 104,
147
+ "Purple": 91,
148
+ "Red": 73,
149
+ "Right": 2,
150
+ "White": 49,
151
+ "Yellow": 105,
152
+ "Yes": 101,
153
+ "above": 22,
154
+ "airplane": 65,
155
+ "apple": 30,
156
+ "banana": 18,
157
+ "baseball bat": 99,
158
+ "baseball glove": 60,
159
+ "bear": 17,
160
+ "bed": 98,
161
+ "bench": 67,
162
+ "bicycle": 8,
163
+ "bird": 63,
164
+ "boat": 68,
165
+ "book": 70,
166
+ "bottle": 45,
167
+ "bowl": 106,
168
+ "broccoli": 56,
169
+ "bus": 10,
170
+ "cake": 42,
171
+ "car": 13,
172
+ "carrot": 12,
173
+ "cat": 53,
174
+ "cell phone": 107,
175
+ "chair": 6,
176
+ "clock": 44,
177
+ "couch": 34,
178
+ "cow": 95,
179
+ "cup": 32,
180
+ "dining table": 21,
181
+ "dog": 48,
182
+ "donut": 72,
183
+ "elephant": 58,
184
+ "fire hydrant": 97,
185
+ "fork": 11,
186
+ "frisbee": 102,
187
+ "giraffe": 38,
188
+ "handbag": 31,
189
+ "horse": 87,
190
+ "hot dog": 4,
191
+ "keyboard": 94,
192
+ "kite": 19,
193
+ "knife": 47,
194
+ "laptop": 75,
195
+ "left": 86,
196
+ "motorcycle": 61,
197
+ "mouse": 7,
198
+ "orange": 76,
199
+ "oven": 29,
200
+ "parking meter": 93,
201
+ "person": 79,
202
+ "pizza": 15,
203
+ "potted plant": 89,
204
+ "refrigerator": 37,
205
+ "remote": 40,
206
+ "right": 62,
207
+ "sandwich": 51,
208
+ "scissors": 84,
209
+ "sheep": 23,
210
+ "sink": 0,
211
+ "skateboard": 50,
212
+ "skis": 100,
213
+ "snowboard": 55,
214
+ "spoon": 92,
215
+ "sports ball": 90,
216
+ "stop sign": 35,
217
+ "suitcase": 54,
218
+ "surfboard": 80,
219
+ "teddy bear": 77,
220
+ "tennis racket": 59,
221
+ "tie": 46,
222
+ "toilet": 103,
223
+ "toothbrush": 3,
224
+ "traffic light": 5,
225
+ "train": 39,
226
+ "truck": 24,
227
+ "tv": 57,
228
+ "umbrella": 33,
229
+ "vase": 88,
230
+ "wine glass": 96,
231
+ "zebra": 1
232
  },
233
  "layer_norm_eps": 1e-12,
234
  "max_image_length": -1,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9286befde0cc5eeffadbf936aad99ed37d7ca481f2b7755ace4122b99f138f5
3
  size 451805856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69cbf36c74dfb571bf45dbc627923b779ab169d1b4761396c5022df7a4e3325d
3
  size 451805856