File size: 6,506 Bytes
6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 6a90f4f e9ac033 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | ---
language: en
tags:
- image-classification
- document-classification
- tensorflow
- efficientnet
- computer-vision
license: mit
pipeline_tag: image-classification
library_name: tf-keras
---
# Document Classifier
A Keras EfficientNet model for classifying real-world document images into structured categories. Includes a full validation pipeline covering image quality checks and AI/fake image detection.
---
## How to use this model
```python
# Step 1 — Install dependencies
# pip install huggingface_hub tensorflow opencv-python pillow
# Step 2 — Copy and run this complete code
from huggingface_hub import snapshot_download
import tensorflow as tf
import numpy as np
import cv2
import json
from tensorflow.keras.applications.efficientnet import preprocess_input
# Download model from Hugging Face (cached after first run)
local_path = snapshot_download(repo_id="shailgsits/document-classifier")
# Load model + class labels
model = tf.saved_model.load(local_path)
infer = model.signatures["serving_default"]
with open(f"{local_path}/class_index.json") as f:
class_indices = json.load(f)
LABELS = {int(v): k for k, v in class_indices.items()}
DOCUMENT_TYPE_LABELS = {
"1_visiting_card": "Visiting Card",
"2_prescription": "Prescription",
"3_shop_banner": "Shop Banner",
"4_invalid_image": "Invalid",
}
def predict(image_path: str) -> dict:
img = cv2.imread(image_path)
if img is None:
return {"status": "ERROR", "message": "Could not read image"}
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
resized = cv2.resize(img_rgb, (224, 224))
input_arr = np.expand_dims(resized.astype(np.float32), axis=0)
input_arr = preprocess_input(input_arr)
outputs = infer(tf.constant(input_arr))
preds = list(outputs.values())[0].numpy()[0]
class_id = int(np.argmax(preds))
confidence = float(np.max(preds))
label = LABELS.get(class_id, "unknown")
friendly = DOCUMENT_TYPE_LABELS.get(label, label)
return {
"status": "VALID" if confidence >= 0.75 else "LOW_CONFIDENCE",
"document_type": label,
"document_type_label": friendly,
"confidence": round(confidence * 100, 2),
"all_scores": {
DOCUMENT_TYPE_LABELS.get(LABELS[i], LABELS[i]): round(float(p) * 100, 2)
for i, p in enumerate(preds)
}
}
# --- Run prediction ---
result = predict("your_image.jpg")
print(result)
# Example output:
# {
# 'status': 'VALID',
# 'document_type': '1_visiting_card',
# 'document_type_label': 'Visiting Card',
# 'confidence': 97.43,
# 'all_scores': {'Visiting Card': 97.43, 'Prescription': 1.2, 'Shop Banner': 0.9, 'Invalid': 0.47}
# }
```
---
## Supported Document Types
| Label | Description |
|---|---|
| `visiting_card` | Business / name cards |
| `prescription` | Medical prescriptions |
| `shop_banner` | Storefront signage, banners |
| `invalid_image` | Rejected / unrecognized documents |
---
## Files in this repo
| File | Description |
|---|---|
| `document_classifier_final.keras` | Trained Keras model (EfficientNet) |
| `class_index.json` | Class name → index mapping |
---
## Quick Test in Google Colab
```python
!pip install huggingface_hub tensorflow pillow opencv-python requests -q
import tensorflow as tf, numpy as np, cv2, requests, json
from PIL import Image
from io import BytesIO
from huggingface_hub import hf_hub_download
from tensorflow.keras.applications.efficientnet import preprocess_input
# Load model + class mapping
model = tf.keras.models.load_model(
hf_hub_download("shailgsits/document-classifier", "document_classifier_final.keras")
)
with open(hf_hub_download("shailgsits/document-classifier", "class_index.json")) as f:
index_to_label = {v: k.split("_", 1)[1] for k, v in json.load(f).items()}
# Predict from any image URL
def predict_from_url(url: str):
img = np.array(Image.open(BytesIO(requests.get(url).content)).convert("RGB"))[:, :, ::-1]
h, w = img.shape[:2]
scale = min(224 / w, 224 / h)
nw, nh = int(w * scale), int(h * scale)
res = cv2.resize(img, (nw, nh))
canvas = np.ones((224, 224, 3), np.uint8) * 255
canvas[(224 - nh) // 2:(224 - nh) // 2 + nh, (224 - nw) // 2:(224 - nw) // 2 + nw] = res
input_arr = preprocess_input(np.expand_dims(canvas.astype(np.float32), 0))
pred = model.predict(input_arr)[0]
idx = int(np.argmax(pred))
return {"label": index_to_label[idx], "confidence": round(float(pred[idx]) * 100, 2)}
# Test with a Google Drive image
url = "https://drive.google.com/uc?export=download&id=YOUR_FILE_ID"
print(predict_from_url(url))
# {'label': 'visiting_card', 'confidence': 97.43}
```
---
## Predict from local file (Colab upload)
```python
from google.colab import files
uploaded = files.upload()
image_path = list(uploaded.keys())[0]
img = cv2.imread(image_path)
h, w = img.shape[:2]
scale = min(224 / w, 224 / h)
nw, nh = int(w * scale), int(h * scale)
res = cv2.resize(img, (nw, nh))
canvas = np.ones((224, 224, 3), np.uint8) * 255
canvas[(224 - nh) // 2:(224 - nh) // 2 + nh, (224 - nw) // 2:(224 - nw) // 2 + nw] = res
input_arr = preprocess_input(np.expand_dims(canvas.astype(np.float32), 0))
pred = model.predict(input_arr)[0]
idx = int(np.argmax(pred))
print({"label": index_to_label[idx], "confidence": round(float(pred[idx]) * 100, 2)})
```
---
## Preprocessing Details
Images are resized with **letterboxing** (aspect-ratio preserved, white padding) to 224×224, then passed through `EfficientNet`'s `preprocess_input`.
---
## Validation Pipeline
Before inference, every image passes through:
| Check | Condition |
|---|---|
| Blank image | Grayscale std < 12 |
| Blurry image | Laplacian variance < 10 |
| Ruled paper | ≥5 evenly-spaced horizontal lines |
| No text detected | Fewer than 6 connected text components |
| AI metadata | EXIF/XMP contains AI tool keywords |
| Screenshot/UI | >55% near-white pixels |
| AI watermark | OCR detects generator text in bottom strip |
| Gemini sparkle | Sparkle artifact in bottom-right corner |
| AI staged background | Card/background sharpness ratio > 5.0 |
| Perspective tilt | >35% lines in 15°–45° diagonal range |
| DCT frequency | High-freq energy ratio > 0.12 |
| Texture uniformity | Patch variance CV < 0.4 and mean var < 50 |
---
## License
MIT
---
## Author
Developed and trained by **[Shailendra Singh Tiwari](https://www.linkedin.com/in/shailendra-singh-tiwari/)** |