vkuropiatnyk commited on
Commit
720dd0a
·
verified ·
1 Parent(s): 44fafc0

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +135 -0
  2. config.json +171 -0
  3. model.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model:
4
+ - google/efficientnet-b0
5
+ ---
6
+
7
+
8
+ # EfficientNet-B0 Document Image Classifier
9
+
10
+ This is an image classification model based on **Google EfficientNet-B0**, fine-tuned to classify input images into one of the following 39 categories (to be reduced):
11
+
12
+ 1. **bar_chart**
13
+ 2. **bar_code**
14
+ 3. **chemistry_structure**
15
+ 4. **flow_chart**
16
+ 5. **icon**
17
+ 6. **line_chart**
18
+ 7. **logo**
19
+ 8. **geographical_map**
20
+ 9. **topographical_map**
21
+ 10. **other**
22
+ 11. **pie_chart**
23
+ 12. **qr_code**
24
+ 13. **scatter_plot**
25
+ 14. **screenshot_from_manual**
26
+ 15. **screenshot_from_computer**
27
+ 16. **calendar**
28
+ 17. **crossword_puzzle**
29
+ 18. **signature**
30
+ 19. **stamp**
31
+ 20. **photograph**
32
+ 21. **engineering_drawing**
33
+ 22. **table**
34
+ 23. **full_page_image**
35
+ 24. **page_thumbnail**
36
+ 25. **music**
37
+ 26. **illustration**
38
+ 27. **treemap**
39
+ 28. **radar_chart**
40
+ 29. **screenshot_from_mobile**
41
+ 30. **sudoku_puzzle**
42
+ 31. **box_plot**
43
+ 32. **cryptoquote**
44
+ 33. **heatmap**
45
+ 34. **poster**
46
+ 35. **passport**
47
+ 36. **legend**
48
+ 37. **area_chart**
49
+ 38. **astrology_chart**
50
+ 39. **book cover**
51
+
52
+
53
+
54
+ ### How to use
55
+ Example of how to classify an image into one of the 39 classes:
56
+
57
+ ```python
58
+ import torch
59
+ import torchvision.transforms as transforms
60
+
61
+ from transformers import EfficientNetForImageClassification
62
+ from PIL import Image
63
+ import requests
64
+
65
+
66
+ urls = [
67
+ 'http://images.cocodataset.org/val2017/000000039769.jpg',
68
+ 'http://images.cocodataset.org/test-stuff2017/000000001750.jpg',
69
+ 'http://images.cocodataset.org/test-stuff2017/000000000001.jpg'
70
+ ]
71
+
72
+ image_processor = transforms.Compose(
73
+ [
74
+ transforms.Resize((224, 224)),
75
+ transforms.ToTensor(),
76
+ transforms.Normalize(
77
+ mean=[0.485, 0.456, 0.406],
78
+ std=[0.47853944, 0.4732864, 0.47434163],
79
+ ),
80
+ ]
81
+ )
82
+
83
+ images = []
84
+ for url in urls:
85
+ image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
86
+ image = image_processor(image)
87
+ images.append(image)
88
+
89
+
90
+ model_id = 'docling-project/DocumentFigureClassifier-v2.0'
91
+
92
+ model = EfficientNetForImageClassification.from_pretrained(model_id)
93
+
94
+ labels = model.config.id2label
95
+
96
+ device = torch.device("cpu")
97
+
98
+ torch_images = torch.stack(images).to(device)
99
+
100
+ with torch.no_grad():
101
+ logits = model(torch_images).logits # (batch_size, num_classes)
102
+ probs_batch = logits.softmax(dim=1) # (batch_size, num_classes)
103
+ probs_batch = probs_batch.cpu().numpy().tolist()
104
+
105
+ for idx, probs_image in enumerate(probs_batch):
106
+ preds = [(labels[i], prob) for i, prob in enumerate(probs_image)]
107
+ preds.sort(key=lambda t: t[1], reverse=True)
108
+ print(f"{idx}: {preds}")
109
+ ```
110
+
111
+
112
+
113
+ ## Citation
114
+ If you use this model in your work, please cite the following papers:
115
+
116
+ ```
117
+ @article{Tan2019EfficientNetRM,
118
+ title={EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks},
119
+ author={Mingxing Tan and Quoc V. Le},
120
+ journal={ArXiv},
121
+ year={2019},
122
+ volume={abs/1905.11946}
123
+ }
124
+
125
+ @techreport{Docling,
126
+ author = {Deep Search Team},
127
+ month = {8},
128
+ title = {{Docling Technical Report}},
129
+ url={https://arxiv.org/abs/2408.09869},
130
+ eprint={2408.09869},
131
+ doi = "10.48550/arXiv.2408.09869",
132
+ version = {1.0.0},
133
+ year = {2024}
134
+ }
135
+ ```
config.json ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "EfficientNetForImageClassification"
4
+ ],
5
+ "batch_norm_eps": 0.001,
6
+ "batch_norm_momentum": 0.99,
7
+ "depth_coefficient": 1.0,
8
+ "depth_divisor": 8,
9
+ "depthwise_padding": [],
10
+ "drop_connect_rate": 0.2,
11
+ "dropout_rate": 0.2,
12
+ "dtype": "float32",
13
+ "expand_ratios": [
14
+ 1,
15
+ 6,
16
+ 6,
17
+ 6,
18
+ 6,
19
+ 6,
20
+ 6
21
+ ],
22
+ "hidden_act": "swish",
23
+ "hidden_dim": 1280,
24
+ "id2label": {
25
+ "0": "bar_chart",
26
+ "1": "bar_code",
27
+ "10": "pie_chart",
28
+ "11": "qr_code",
29
+ "12": "scatter_plot",
30
+ "13": "screenshot_from_manual",
31
+ "14": "screenshot_from_computer",
32
+ "15": "calendar",
33
+ "16": "crossword_puzzle",
34
+ "17": "signature",
35
+ "18": "stamp",
36
+ "19": "photograph",
37
+ "2": "chemistry_structure",
38
+ "20": "engineering_drawing",
39
+ "21": "table",
40
+ "22": "full_page_image",
41
+ "23": "page_thumbnail",
42
+ "24": "music",
43
+ "25": "illustration",
44
+ "26": "treemap",
45
+ "27": "radar_chart",
46
+ "28": "screenshot_from_mobile",
47
+ "29": "sudoku_puzzle",
48
+ "3": "flow_chart",
49
+ "30": "box_plot",
50
+ "31": "cryptoquote",
51
+ "32": "heatmap",
52
+ "33": "poster",
53
+ "34": "passport",
54
+ "35": "legend",
55
+ "36": "area_chart",
56
+ "37": "astrology_chart",
57
+ "38": "book cover",
58
+ "4": "icon",
59
+ "5": "line_chart",
60
+ "6": "logo",
61
+ "7": "geographical_map",
62
+ "8": "topographical_map",
63
+ "9": "other"
64
+ },
65
+ "image_size": 224,
66
+ "in_channels": [
67
+ 32,
68
+ 16,
69
+ 24,
70
+ 40,
71
+ 80,
72
+ 112,
73
+ 192
74
+ ],
75
+ "initializer_range": 0.02,
76
+ "kernel_sizes": [
77
+ 3,
78
+ 3,
79
+ 5,
80
+ 3,
81
+ 5,
82
+ 5,
83
+ 3
84
+ ],
85
+ "label2id": {
86
+ "area_chart": "36",
87
+ "astrology_chart": "37",
88
+ "bar_chart": "0",
89
+ "bar_code": "1",
90
+ "book cover": "38",
91
+ "box_plot": "30",
92
+ "calendar": "15",
93
+ "chemistry_structure": "2",
94
+ "crossword_puzzle": "16",
95
+ "cryptoquote": "31",
96
+ "engineering_drawing": "20",
97
+ "flow_chart": "3",
98
+ "full_page_image": "22",
99
+ "geographical_map": "7",
100
+ "heatmap": "32",
101
+ "icon": "4",
102
+ "illustration": "25",
103
+ "legend": "35",
104
+ "line_chart": "5",
105
+ "logo": "6",
106
+ "music": "24",
107
+ "other": "9",
108
+ "page_thumbnail": "23",
109
+ "passport": "34",
110
+ "photograph": "19",
111
+ "pie_chart": "10",
112
+ "poster": "33",
113
+ "qr_code": "11",
114
+ "radar_chart": "27",
115
+ "scatter_plot": "12",
116
+ "screenshot_from_computer": "14",
117
+ "screenshot_from_manual": "13",
118
+ "screenshot_from_mobile": "28",
119
+ "signature": "17",
120
+ "stamp": "18",
121
+ "sudoku_puzzle": "29",
122
+ "table": "21",
123
+ "topographical_map": "8",
124
+ "treemap": "26"
125
+ },
126
+ "model_type": "efficientnet",
127
+ "num_block_repeats": [
128
+ 1,
129
+ 2,
130
+ 2,
131
+ 3,
132
+ 3,
133
+ 4,
134
+ 1
135
+ ],
136
+ "num_channels": 3,
137
+ "num_hidden_layers": 64,
138
+ "out_channels": [
139
+ 16,
140
+ 24,
141
+ 40,
142
+ 80,
143
+ 112,
144
+ 192,
145
+ 320
146
+ ],
147
+ "out_features": null,
148
+ "pooling_type": "mean",
149
+ "squeeze_expansion_ratio": 0.25,
150
+ "stage_names": [
151
+ "stem",
152
+ "stage1",
153
+ "stage2",
154
+ "stage3",
155
+ "stage4",
156
+ "stage5",
157
+ "stage6",
158
+ "stage7"
159
+ ],
160
+ "strides": [
161
+ 1,
162
+ 2,
163
+ 2,
164
+ 2,
165
+ 1,
166
+ 2,
167
+ 1
168
+ ],
169
+ "transformers_version": "4.57.3",
170
+ "width_coefficient": 1.0
171
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:441ff87d71573c0aea1f8d00537ae8b2c88baf4885674677f410de08db2bd547
3
+ size 16444820