phonghoccode commited on
Commit
aa14ac6
·
verified ·
1 Parent(s): 3466bfa

Upload CustomViltForVQA

Browse files
Files changed (2) hide show
  1. config.json +213 -213
  2. model.safetensors +1 -1
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "phonghoccode/vilt-vqa-finetune",
3
  "architectures": [
4
  "CustomViltForVQA"
5
  ],
@@ -8,227 +8,227 @@
8
  "hidden_dropout_prob": 0.0,
9
  "hidden_size": 768,
10
  "id2label": {
11
- "0": "sink",
12
- "1": "zebra",
13
- "2": "Right",
14
- "3": "toothbrush",
15
- "4": "hot dog",
16
- "5": "traffic light",
17
- "6": "chair",
18
- "7": "mouse",
19
- "8": "bicycle",
20
- "9": "2",
21
- "10": "bus",
22
- "11": "fork",
23
- "12": "carrot",
24
- "13": "car",
25
- "14": "Blue",
26
- "15": "pizza",
27
- "16": "9",
28
- "17": "bear",
29
- "18": "banana",
30
- "19": "kite",
31
- "20": "6",
32
- "21": "dining table",
33
- "22": "above",
34
- "23": "sheep",
35
- "24": "truck",
36
- "25": "5",
37
- "26": "8",
38
- "27": "Left",
39
- "28": "0",
40
- "29": "oven",
41
- "30": "apple",
42
- "31": "handbag",
43
- "32": "cup",
44
- "33": "umbrella",
45
- "34": "couch",
46
- "35": "stop sign",
47
- "36": "Above",
48
- "37": "refrigerator",
49
- "38": "giraffe",
50
- "39": "train",
51
- "40": "remote",
52
- "41": "Below",
53
- "42": "cake",
54
- "43": "3",
55
- "44": "clock",
56
- "45": "bottle",
57
- "46": "tie",
58
- "47": "knife",
59
- "48": "dog",
60
- "49": "White",
61
- "50": "skateboard",
62
- "51": "sandwich",
63
  "52": "Front",
64
- "53": "cat",
65
- "54": "suitcase",
66
- "55": "snowboard",
67
- "56": "broccoli",
68
- "57": "tv",
69
- "58": "elephant",
70
- "59": "tennis racket",
71
- "60": "baseball glove",
72
- "61": "motorcycle",
73
- "62": "right",
74
- "63": "bird",
75
  "64": "Brown",
76
- "65": "airplane",
77
- "66": "1",
78
- "67": "bench",
79
- "68": "boat",
80
- "69": "No",
81
- "70": "book",
82
- "71": "Orange",
83
- "72": "donut",
84
- "73": "Red",
85
- "74": "Behind",
86
- "75": "laptop",
87
- "76": "orange",
88
- "77": "teddy bear",
89
- "78": "Black",
90
- "79": "person",
91
- "80": "surfboard",
92
- "81": "7",
93
- "82": "4",
94
- "83": "Green",
95
- "84": "scissors",
96
- "85": "Grey",
97
- "86": "left",
98
- "87": "horse",
99
- "88": "vase",
100
- "89": "potted plant",
101
- "90": "sports ball",
102
- "91": "Purple",
103
- "92": "spoon",
104
- "93": "parking meter",
105
- "94": "keyboard",
106
- "95": "cow",
107
- "96": "wine glass",
108
- "97": "fire hydrant",
109
- "98": "bed",
110
- "99": "baseball bat",
111
- "100": "skis",
112
- "101": "Yes",
113
- "102": "frisbee",
114
- "103": "toilet",
115
- "104": "Pink",
116
- "105": "Yellow",
117
- "106": "bowl",
118
- "107": "cell phone"
119
  },
120
  "image_size": 384,
121
  "initializer_range": 0.02,
122
  "intermediate_size": 3072,
123
  "label2id": {
124
- "0": 28,
125
- "1": 66,
126
- "2": 9,
127
- "3": 43,
128
- "4": 82,
129
- "5": 25,
130
- "6": 20,
131
- "7": 81,
132
- "8": 26,
133
- "9": 16,
134
- "Above": 36,
135
- "Behind": 74,
136
- "Below": 41,
137
- "Black": 78,
138
- "Blue": 14,
139
  "Brown": 64,
140
  "Front": 52,
141
- "Green": 83,
142
- "Grey": 85,
143
- "Left": 27,
144
- "No": 69,
145
- "Orange": 71,
146
- "Pink": 104,
147
- "Purple": 91,
148
- "Red": 73,
149
- "Right": 2,
150
- "White": 49,
151
- "Yellow": 105,
152
- "Yes": 101,
153
- "above": 22,
154
- "airplane": 65,
155
- "apple": 30,
156
- "banana": 18,
157
- "baseball bat": 99,
158
- "baseball glove": 60,
159
- "bear": 17,
160
- "bed": 98,
161
- "bench": 67,
162
- "bicycle": 8,
163
- "bird": 63,
164
- "boat": 68,
165
- "book": 70,
166
- "bottle": 45,
167
- "bowl": 106,
168
- "broccoli": 56,
169
- "bus": 10,
170
- "cake": 42,
171
- "car": 13,
172
- "carrot": 12,
173
- "cat": 53,
174
- "cell phone": 107,
175
- "chair": 6,
176
- "clock": 44,
177
- "couch": 34,
178
- "cow": 95,
179
- "cup": 32,
180
- "dining table": 21,
181
- "dog": 48,
182
- "donut": 72,
183
- "elephant": 58,
184
- "fire hydrant": 97,
185
- "fork": 11,
186
- "frisbee": 102,
187
- "giraffe": 38,
188
- "handbag": 31,
189
- "horse": 87,
190
- "hot dog": 4,
191
- "keyboard": 94,
192
- "kite": 19,
193
- "knife": 47,
194
- "laptop": 75,
195
- "left": 86,
196
- "motorcycle": 61,
197
- "mouse": 7,
198
- "orange": 76,
199
- "oven": 29,
200
- "parking meter": 93,
201
- "person": 79,
202
- "pizza": 15,
203
- "potted plant": 89,
204
- "refrigerator": 37,
205
- "remote": 40,
206
- "right": 62,
207
- "sandwich": 51,
208
- "scissors": 84,
209
- "sheep": 23,
210
- "sink": 0,
211
- "skateboard": 50,
212
- "skis": 100,
213
- "snowboard": 55,
214
- "spoon": 92,
215
- "sports ball": 90,
216
- "stop sign": 35,
217
- "suitcase": 54,
218
- "surfboard": 80,
219
- "teddy bear": 77,
220
- "tennis racket": 59,
221
- "tie": 46,
222
- "toilet": 103,
223
- "toothbrush": 3,
224
- "traffic light": 5,
225
- "train": 39,
226
- "truck": 24,
227
- "tv": 57,
228
- "umbrella": 33,
229
- "vase": 88,
230
- "wine glass": 96,
231
- "zebra": 1
232
  },
233
  "layer_norm_eps": 1e-12,
234
  "max_image_length": -1,
 
1
  {
2
+ "_name_or_path": "phonghoccode/vilt-vqa-finetune-pytorch",
3
  "architectures": [
4
  "CustomViltForVQA"
5
  ],
 
8
  "hidden_dropout_prob": 0.0,
9
  "hidden_size": 768,
10
  "id2label": {
11
+ "0": "donut",
12
+ "1": "Orange",
13
+ "2": "bottle",
14
+ "3": "laptop",
15
+ "4": "toilet",
16
+ "5": "car",
17
+ "6": "fork",
18
+ "7": "bus",
19
+ "8": "keyboard",
20
+ "9": "0",
21
+ "10": "Green",
22
+ "11": "vase",
23
+ "12": "bear",
24
+ "13": "4",
25
+ "14": "3",
26
+ "15": "bird",
27
+ "16": "Yellow",
28
+ "17": "Grey",
29
+ "18": "suitcase",
30
+ "19": "skateboard",
31
+ "20": "tv",
32
+ "21": "Red",
33
+ "22": "Behind",
34
+ "23": "spoon",
35
+ "24": "bicycle",
36
+ "25": "7",
37
+ "26": "remote",
38
+ "27": "kite",
39
+ "28": "orange",
40
+ "29": "cow",
41
+ "30": "Below",
42
+ "31": "parking meter",
43
+ "32": "right",
44
+ "33": "bowl",
45
+ "34": "sheep",
46
+ "35": "handbag",
47
+ "36": "potted plant",
48
+ "37": "left",
49
+ "38": "airplane",
50
+ "39": "2",
51
+ "40": "6",
52
+ "41": "elephant",
53
+ "42": "skis",
54
+ "43": "bench",
55
+ "44": "dog",
56
+ "45": "truck",
57
+ "46": "Left",
58
+ "47": "umbrella",
59
+ "48": "motorcycle",
60
+ "49": "5",
61
+ "50": "tennis racket",
62
+ "51": "cake",
63
  "52": "Front",
64
+ "53": "clock",
65
+ "54": "teddy bear",
66
+ "55": "hot dog",
67
+ "56": "oven",
68
+ "57": "toothbrush",
69
+ "58": "Black",
70
+ "59": "book",
71
+ "60": "1",
72
+ "61": "tie",
73
+ "62": "couch",
74
+ "63": "mouse",
75
  "64": "Brown",
76
+ "65": "dining table",
77
+ "66": "Pink",
78
+ "67": "carrot",
79
+ "68": "surfboard",
80
+ "69": "pizza",
81
+ "70": "bed",
82
+ "71": "cell phone",
83
+ "72": "broccoli",
84
+ "73": "scissors",
85
+ "74": "Purple",
86
+ "75": "boat",
87
+ "76": "Yes",
88
+ "77": "apple",
89
+ "78": "Blue",
90
+ "79": "stop sign",
91
+ "80": "8",
92
+ "81": "frisbee",
93
+ "82": "sports ball",
94
+ "83": "9",
95
+ "84": "fire hydrant",
96
+ "85": "wine glass",
97
+ "86": "sink",
98
+ "87": "baseball glove",
99
+ "88": "cat",
100
+ "89": "train",
101
+ "90": "banana",
102
+ "91": "horse",
103
+ "92": "above",
104
+ "93": "White",
105
+ "94": "traffic light",
106
+ "95": "snowboard",
107
+ "96": "No",
108
+ "97": "baseball bat",
109
+ "98": "person",
110
+ "99": "refrigerator",
111
+ "100": "zebra",
112
+ "101": "chair",
113
+ "102": "cup",
114
+ "103": "giraffe",
115
+ "104": "knife",
116
+ "105": "Right",
117
+ "106": "Above",
118
+ "107": "sandwich"
119
  },
120
  "image_size": 384,
121
  "initializer_range": 0.02,
122
  "intermediate_size": 3072,
123
  "label2id": {
124
+ "0": 9,
125
+ "1": 60,
126
+ "2": 39,
127
+ "3": 14,
128
+ "4": 13,
129
+ "5": 49,
130
+ "6": 40,
131
+ "7": 25,
132
+ "8": 80,
133
+ "9": 83,
134
+ "Above": 106,
135
+ "Behind": 22,
136
+ "Below": 30,
137
+ "Black": 58,
138
+ "Blue": 78,
139
  "Brown": 64,
140
  "Front": 52,
141
+ "Green": 10,
142
+ "Grey": 17,
143
+ "Left": 46,
144
+ "No": 96,
145
+ "Orange": 1,
146
+ "Pink": 66,
147
+ "Purple": 74,
148
+ "Red": 21,
149
+ "Right": 105,
150
+ "White": 93,
151
+ "Yellow": 16,
152
+ "Yes": 76,
153
+ "above": 92,
154
+ "airplane": 38,
155
+ "apple": 77,
156
+ "banana": 90,
157
+ "baseball bat": 97,
158
+ "baseball glove": 87,
159
+ "bear": 12,
160
+ "bed": 70,
161
+ "bench": 43,
162
+ "bicycle": 24,
163
+ "bird": 15,
164
+ "boat": 75,
165
+ "book": 59,
166
+ "bottle": 2,
167
+ "bowl": 33,
168
+ "broccoli": 72,
169
+ "bus": 7,
170
+ "cake": 51,
171
+ "car": 5,
172
+ "carrot": 67,
173
+ "cat": 88,
174
+ "cell phone": 71,
175
+ "chair": 101,
176
+ "clock": 53,
177
+ "couch": 62,
178
+ "cow": 29,
179
+ "cup": 102,
180
+ "dining table": 65,
181
+ "dog": 44,
182
+ "donut": 0,
183
+ "elephant": 41,
184
+ "fire hydrant": 84,
185
+ "fork": 6,
186
+ "frisbee": 81,
187
+ "giraffe": 103,
188
+ "handbag": 35,
189
+ "horse": 91,
190
+ "hot dog": 55,
191
+ "keyboard": 8,
192
+ "kite": 27,
193
+ "knife": 104,
194
+ "laptop": 3,
195
+ "left": 37,
196
+ "motorcycle": 48,
197
+ "mouse": 63,
198
+ "orange": 28,
199
+ "oven": 56,
200
+ "parking meter": 31,
201
+ "person": 98,
202
+ "pizza": 69,
203
+ "potted plant": 36,
204
+ "refrigerator": 99,
205
+ "remote": 26,
206
+ "right": 32,
207
+ "sandwich": 107,
208
+ "scissors": 73,
209
+ "sheep": 34,
210
+ "sink": 86,
211
+ "skateboard": 19,
212
+ "skis": 42,
213
+ "snowboard": 95,
214
+ "spoon": 23,
215
+ "sports ball": 82,
216
+ "stop sign": 79,
217
+ "suitcase": 18,
218
+ "surfboard": 68,
219
+ "teddy bear": 54,
220
+ "tennis racket": 50,
221
+ "tie": 61,
222
+ "toilet": 4,
223
+ "toothbrush": 57,
224
+ "traffic light": 94,
225
+ "train": 89,
226
+ "truck": 45,
227
+ "tv": 20,
228
+ "umbrella": 47,
229
+ "vase": 11,
230
+ "wine glass": 85,
231
+ "zebra": 100
232
  },
233
  "layer_norm_eps": 1e-12,
234
  "max_image_length": -1,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b4ad01f7ec4290d6aaa072d375a9ea97299dda1ef65481261db859bdd6d32b5
3
  size 451805856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dedc1b78fed2a462f78520a12f30806263dd5084a8e432619019ff4b234b827
3
  size 451805856