alterf commited on
Commit
2ba982c
·
verified ·
1 Parent(s): 861ce11

Upload TableTransformerForObjectDetection

Browse files
Files changed (2) hide show
  1. config.json +187 -17
  2. model.safetensors +2 -2
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "microsoft/table-transformer-structure-recognition",
3
  "activation_dropout": 0.0,
4
  "activation_function": "relu",
5
  "architectures": [
@@ -7,13 +7,13 @@
7
  ],
8
  "attention_dropout": 0.0,
9
  "auxiliary_loss": false,
10
- "backbone": "resnet18",
11
  "backbone_config": null,
12
  "backbone_kwargs": null,
13
  "bbox_cost": 5,
14
  "bbox_loss_coefficient": 5,
15
- "ce_loss_coefficient": 1,
16
  "class_cost": 1,
 
17
  "d_model": 256,
18
  "decoder_attention_heads": 8,
19
  "decoder_ffn_dim": 2048,
@@ -26,34 +26,204 @@
26
  "encoder_ffn_dim": 2048,
27
  "encoder_layerdrop": 0.0,
28
  "encoder_layers": 6,
29
- "eos_coefficient": 0.4,
30
  "giou_cost": 2,
31
  "giou_loss_coefficient": 2,
32
  "id2label": {
33
- "0": "table",
34
- "1": "table column",
35
- "2": "table row",
36
- "3": "table column header",
37
- "4": "table projected row header",
38
- "5": "table spanning cell"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  },
40
  "init_std": 0.02,
41
  "init_xavier_std": 1.0,
42
  "is_encoder_decoder": true,
43
  "label2id": {
44
- "table": 0,
45
- "table column": 1,
46
- "table column header": 3,
47
- "table projected row header": 4,
48
- "table row": 2,
49
- "table spanning cell": 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  },
51
  "mask_loss_coefficient": 1,
52
  "max_position_embeddings": 1024,
53
  "model_type": "table-transformer",
54
  "num_channels": 3,
55
  "num_hidden_layers": 6,
56
- "num_queries": 125,
57
  "position_embedding_type": "sine",
58
  "scale_embedding": false,
59
  "torch_dtype": "float32",
 
1
  {
2
+ "_name_or_path": "facebook/detr-resnet-50",
3
  "activation_dropout": 0.0,
4
  "activation_function": "relu",
5
  "architectures": [
 
7
  ],
8
  "attention_dropout": 0.0,
9
  "auxiliary_loss": false,
10
+ "backbone": "resnet50",
11
  "backbone_config": null,
12
  "backbone_kwargs": null,
13
  "bbox_cost": 5,
14
  "bbox_loss_coefficient": 5,
 
15
  "class_cost": 1,
16
+ "classifier_dropout": 0.0,
17
  "d_model": 256,
18
  "decoder_attention_heads": 8,
19
  "decoder_ffn_dim": 2048,
 
26
  "encoder_ffn_dim": 2048,
27
  "encoder_layerdrop": 0.0,
28
  "encoder_layers": 6,
29
+ "eos_coefficient": 0.1,
30
  "giou_cost": 2,
31
  "giou_loss_coefficient": 2,
32
  "id2label": {
33
+ "0": "N/A",
34
+ "1": "person",
35
+ "2": "bicycle",
36
+ "3": "car",
37
+ "4": "motorcycle",
38
+ "5": "airplane",
39
+ "6": "bus",
40
+ "7": "train",
41
+ "8": "truck",
42
+ "9": "boat",
43
+ "10": "traffic light",
44
+ "11": "fire hydrant",
45
+ "12": "street sign",
46
+ "13": "stop sign",
47
+ "14": "parking meter",
48
+ "15": "bench",
49
+ "16": "bird",
50
+ "17": "cat",
51
+ "18": "dog",
52
+ "19": "horse",
53
+ "20": "sheep",
54
+ "21": "cow",
55
+ "22": "elephant",
56
+ "23": "bear",
57
+ "24": "zebra",
58
+ "25": "giraffe",
59
+ "26": "hat",
60
+ "27": "backpack",
61
+ "28": "umbrella",
62
+ "29": "shoe",
63
+ "30": "eye glasses",
64
+ "31": "handbag",
65
+ "32": "tie",
66
+ "33": "suitcase",
67
+ "34": "frisbee",
68
+ "35": "skis",
69
+ "36": "snowboard",
70
+ "37": "sports ball",
71
+ "38": "kite",
72
+ "39": "baseball bat",
73
+ "40": "baseball glove",
74
+ "41": "skateboard",
75
+ "42": "surfboard",
76
+ "43": "tennis racket",
77
+ "44": "bottle",
78
+ "45": "plate",
79
+ "46": "wine glass",
80
+ "47": "cup",
81
+ "48": "fork",
82
+ "49": "knife",
83
+ "50": "spoon",
84
+ "51": "bowl",
85
+ "52": "banana",
86
+ "53": "apple",
87
+ "54": "sandwich",
88
+ "55": "orange",
89
+ "56": "broccoli",
90
+ "57": "carrot",
91
+ "58": "hot dog",
92
+ "59": "pizza",
93
+ "60": "donut",
94
+ "61": "cake",
95
+ "62": "chair",
96
+ "63": "couch",
97
+ "64": "potted plant",
98
+ "65": "bed",
99
+ "66": "mirror",
100
+ "67": "dining table",
101
+ "68": "window",
102
+ "69": "desk",
103
+ "70": "toilet",
104
+ "71": "door",
105
+ "72": "tv",
106
+ "73": "laptop",
107
+ "74": "mouse",
108
+ "75": "remote",
109
+ "76": "keyboard",
110
+ "77": "cell phone",
111
+ "78": "microwave",
112
+ "79": "oven",
113
+ "80": "toaster",
114
+ "81": "sink",
115
+ "82": "refrigerator",
116
+ "83": "blender",
117
+ "84": "book",
118
+ "85": "clock",
119
+ "86": "vase",
120
+ "87": "scissors",
121
+ "88": "teddy bear",
122
+ "89": "hair drier",
123
+ "90": "toothbrush"
124
  },
125
  "init_std": 0.02,
126
  "init_xavier_std": 1.0,
127
  "is_encoder_decoder": true,
128
  "label2id": {
129
+ "N/A": 0,
130
+ "airplane": 5,
131
+ "apple": 53,
132
+ "backpack": 27,
133
+ "banana": 52,
134
+ "baseball bat": 39,
135
+ "baseball glove": 40,
136
+ "bear": 23,
137
+ "bed": 65,
138
+ "bench": 15,
139
+ "bicycle": 2,
140
+ "bird": 16,
141
+ "blender": 83,
142
+ "boat": 9,
143
+ "book": 84,
144
+ "bottle": 44,
145
+ "bowl": 51,
146
+ "broccoli": 56,
147
+ "bus": 6,
148
+ "cake": 61,
149
+ "car": 3,
150
+ "carrot": 57,
151
+ "cat": 17,
152
+ "cell phone": 77,
153
+ "chair": 62,
154
+ "clock": 85,
155
+ "couch": 63,
156
+ "cow": 21,
157
+ "cup": 47,
158
+ "desk": 69,
159
+ "dining table": 67,
160
+ "dog": 18,
161
+ "donut": 60,
162
+ "door": 71,
163
+ "elephant": 22,
164
+ "eye glasses": 30,
165
+ "fire hydrant": 11,
166
+ "fork": 48,
167
+ "frisbee": 34,
168
+ "giraffe": 25,
169
+ "hair drier": 89,
170
+ "handbag": 31,
171
+ "hat": 26,
172
+ "horse": 19,
173
+ "hot dog": 58,
174
+ "keyboard": 76,
175
+ "kite": 38,
176
+ "knife": 49,
177
+ "laptop": 73,
178
+ "microwave": 78,
179
+ "mirror": 66,
180
+ "motorcycle": 4,
181
+ "mouse": 74,
182
+ "orange": 55,
183
+ "oven": 79,
184
+ "parking meter": 14,
185
+ "person": 1,
186
+ "pizza": 59,
187
+ "plate": 45,
188
+ "potted plant": 64,
189
+ "refrigerator": 82,
190
+ "remote": 75,
191
+ "sandwich": 54,
192
+ "scissors": 87,
193
+ "sheep": 20,
194
+ "shoe": 29,
195
+ "sink": 81,
196
+ "skateboard": 41,
197
+ "skis": 35,
198
+ "snowboard": 36,
199
+ "spoon": 50,
200
+ "sports ball": 37,
201
+ "stop sign": 13,
202
+ "street sign": 12,
203
+ "suitcase": 33,
204
+ "surfboard": 42,
205
+ "teddy bear": 88,
206
+ "tennis racket": 43,
207
+ "tie": 32,
208
+ "toaster": 80,
209
+ "toilet": 70,
210
+ "toothbrush": 90,
211
+ "traffic light": 10,
212
+ "train": 7,
213
+ "truck": 8,
214
+ "tv": 72,
215
+ "umbrella": 28,
216
+ "vase": 86,
217
+ "window": 68,
218
+ "wine glass": 46,
219
+ "zebra": 24
220
  },
221
  "mask_loss_coefficient": 1,
222
  "max_position_embeddings": 1024,
223
  "model_type": "table-transformer",
224
  "num_channels": 3,
225
  "num_hidden_layers": 6,
226
+ "num_queries": 100,
227
  "position_embedding_type": "sine",
228
  "scale_embedding": false,
229
  "torch_dtype": "float32",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a3ff8fba210d0cf46611b78e2934def5c2a306c71eda66adc4be6a2baf49482
3
- size 115433860
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97e99a6487a58fa25c30faa16e909378e05ffaed7d5bbf6db2192db0e96ff7e5
3
+ size 166589600