Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README.md +135 -0
config.json +171 -0
model.safetensors +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,135 @@

+---
+license: mit
+base_model:
+- google/efficientnet-b0
+---
+# EfficientNet-B0 Document Image Classifier
+This is an image classification model based on **Google EfficientNet-B0**, fine-tuned to classify input images into one of the following 39 categories (to be reduced):
+1. **bar_chart**
+2. **bar_code**
+3. **chemistry_structure**
+4. **flow_chart**
+5. **icon**
+6. **line_chart**
+7. **logo**
+8. **geographical_map**
+9. **topographical_map**
+10. **other**
+11. **pie_chart**
+12. **qr_code**
+13. **scatter_plot**
+14. **screenshot_from_manual**
+15. **screenshot_from_computer**
+16. **calendar**
+17. **crossword_puzzle**
+18. **signature**
+19. **stamp**
+20. **photograph**
+21. **engineering_drawing**
+22. **table**
+23. **full_page_image**
+24. **page_thumbnail**
+25. **music**
+26. **illustration**
+27. **treemap**
+28. **radar_chart**
+29. **screenshot_from_mobile**
+30. **sudoku_puzzle**
+31. **box_plot**
+32. **cryptoquote**
+33. **heatmap**
+34. **poster**
+35. **passport**
+36. **legend**
+37. **area_chart**
+38. **astrology_chart**
+39. **book cover**
+### How to use
+Example of how to classify an image into one of the 39 classes:
+```python
+import torch
+import torchvision.transforms as transforms
+from transformers import EfficientNetForImageClassification
+from PIL import Image
+import requests
+urls = [
+    'http://images.cocodataset.org/val2017/000000039769.jpg',
+    'http://images.cocodataset.org/test-stuff2017/000000001750.jpg',
+    'http://images.cocodataset.org/test-stuff2017/000000000001.jpg'
+]
+image_processor = transforms.Compose(
+    [
+        transforms.Resize((224, 224)),
+        transforms.ToTensor(),
+        transforms.Normalize(
+            mean=[0.485, 0.456, 0.406],
+            std=[0.47853944, 0.4732864, 0.47434163],
+        ),
+    ]
+)
+images = []
+for url in urls:
+    image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+    image = image_processor(image)
+    images.append(image)
+model_id = 'docling-project/DocumentFigureClassifier-v2.0'
+model = EfficientNetForImageClassification.from_pretrained(model_id)
+labels = model.config.id2label
+device = torch.device("cpu")
+torch_images = torch.stack(images).to(device)
+with torch.no_grad():
+    logits = model(torch_images).logits  # (batch_size, num_classes)
+    probs_batch = logits.softmax(dim=1)  # (batch_size, num_classes)
+    probs_batch = probs_batch.cpu().numpy().tolist()
+for idx, probs_image in enumerate(probs_batch):
+    preds = [(labels[i], prob) for i, prob in enumerate(probs_image)]
+    preds.sort(key=lambda t: t[1], reverse=True)
+    print(f"{idx}: {preds}")
+```
+## Citation
+If you use this model in your work, please cite the following papers:
+```
+@article{Tan2019EfficientNetRM,
+  title={EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks},
+  author={Mingxing Tan and Quoc V. Le},
+  journal={ArXiv},
+  year={2019},
+  volume={abs/1905.11946}
+}
+@techreport{Docling,
+  author = {Deep Search Team},
+  month = {8},
+  title = {{Docling Technical Report}},
+  url={https://arxiv.org/abs/2408.09869},
+  eprint={2408.09869},
+  doi = "10.48550/arXiv.2408.09869",
+  version = {1.0.0},
+  year = {2024}
+}
+```

config.json ADDED Viewed

	@@ -0,0 +1,171 @@

+{
+  "architectures": [
+    "EfficientNetForImageClassification"
+  ],
+  "batch_norm_eps": 0.001,
+  "batch_norm_momentum": 0.99,
+  "depth_coefficient": 1.0,
+  "depth_divisor": 8,
+  "depthwise_padding": [],
+  "drop_connect_rate": 0.2,
+  "dropout_rate": 0.2,
+  "dtype": "float32",
+  "expand_ratios": [
+    1,
+    6,
+    6,
+    6,
+    6,
+    6,
+    6
+  ],
+  "hidden_act": "swish",
+  "hidden_dim": 1280,
+  "id2label": {
+    "0": "bar_chart",
+    "1": "bar_code",
+    "10": "pie_chart",
+    "11": "qr_code",
+    "12": "scatter_plot",
+    "13": "screenshot_from_manual",
+    "14": "screenshot_from_computer",
+    "15": "calendar",
+    "16": "crossword_puzzle",
+    "17": "signature",
+    "18": "stamp",
+    "19": "photograph",
+    "2": "chemistry_structure",
+    "20": "engineering_drawing",
+    "21": "table",
+    "22": "full_page_image",
+    "23": "page_thumbnail",
+    "24": "music",
+    "25": "illustration",
+    "26": "treemap",
+    "27": "radar_chart",
+    "28": "screenshot_from_mobile",
+    "29": "sudoku_puzzle",
+    "3": "flow_chart",
+    "30": "box_plot",
+    "31": "cryptoquote",
+    "32": "heatmap",
+    "33": "poster",
+    "34": "passport",
+    "35": "legend",
+    "36": "area_chart",
+    "37": "astrology_chart",
+    "38": "book cover",
+    "4": "icon",
+    "5": "line_chart",
+    "6": "logo",
+    "7": "geographical_map",
+    "8": "topographical_map",
+    "9": "other"
+  },
+  "image_size": 224,
+  "in_channels": [
+    32,
+    16,
+    24,
+    40,
+    80,
+    112,
+    192
+  ],
+  "initializer_range": 0.02,
+  "kernel_sizes": [
+    3,
+    3,
+    5,
+    3,
+    5,
+    5,
+    3
+  ],
+  "label2id": {
+    "area_chart": "36",
+    "astrology_chart": "37",
+    "bar_chart": "0",
+    "bar_code": "1",
+    "book cover": "38",
+    "box_plot": "30",
+    "calendar": "15",
+    "chemistry_structure": "2",
+    "crossword_puzzle": "16",
+    "cryptoquote": "31",
+    "engineering_drawing": "20",
+    "flow_chart": "3",
+    "full_page_image": "22",
+    "geographical_map": "7",
+    "heatmap": "32",
+    "icon": "4",
+    "illustration": "25",
+    "legend": "35",
+    "line_chart": "5",
+    "logo": "6",
+    "music": "24",
+    "other": "9",
+    "page_thumbnail": "23",
+    "passport": "34",
+    "photograph": "19",
+    "pie_chart": "10",
+    "poster": "33",
+    "qr_code": "11",
+    "radar_chart": "27",
+    "scatter_plot": "12",
+    "screenshot_from_computer": "14",
+    "screenshot_from_manual": "13",
+    "screenshot_from_mobile": "28",
+    "signature": "17",
+    "stamp": "18",
+    "sudoku_puzzle": "29",
+    "table": "21",
+    "topographical_map": "8",
+    "treemap": "26"
+  },
+  "model_type": "efficientnet",
+  "num_block_repeats": [
+    1,
+    2,
+    2,
+    3,
+    3,
+    4,
+    1
+  ],
+  "num_channels": 3,
+  "num_hidden_layers": 64,
+  "out_channels": [
+    16,
+    24,
+    40,
+    80,
+    112,
+    192,
+    320
+  ],
+  "out_features": null,
+  "pooling_type": "mean",
+  "squeeze_expansion_ratio": 0.25,
+  "stage_names": [
+    "stem",
+    "stage1",
+    "stage2",
+    "stage3",
+    "stage4",
+    "stage5",
+    "stage6",
+    "stage7"
+  ],
+  "strides": [
+    1,
+    2,
+    2,
+    2,
+    1,
+    2,
+    1
+  ],
+  "transformers_version": "4.57.3",
+  "width_coefficient": 1.0
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:441ff87d71573c0aea1f8d00537ae8b2c88baf4885674677f410de08db2bd547
+size 16444820