Shadow0482 commited on
Commit
3dd0c9e
·
verified ·
1 Parent(s): 4999641

Upload fine-tuned MobileViT-DR with ONNX

Browse files
README.md ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - vision
4
+ - image-classification
5
+ - onnx
6
+ - mobilevit
7
+ - medical
8
+ datasets:
9
+ - rohithgowdax/processed-dr
10
+ library_name: transformers
11
+ widget:
12
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg
13
+ example_title: Example Eye Scan
14
+ ---
15
+
16
+ # Mithu-ViT: Diabetic Retinopathy Classifier
17
+
18
+ This is a **MobileViT (Small)** model fine-tuned on the [Processed Diabetic Retinopathy dataset](https://www.kaggle.com/datasets/rohithgowdax/processed-dr).
19
+
20
+ It classifies retina scans into 5 severity levels:
21
+ - **0**: No DR
22
+ - **1**: Mild
23
+ - **2**: Moderate
24
+ - **3**: Severe
25
+ - **4**: Proliferative DR
26
+
27
+ ## Model Details
28
+ - **Architecture**: MobileViT-Small (Apple)
29
+ - **Format**: PyTorch (`pytorch_model.bin`) and ONNX (`mithu-vit.onnx`)
30
+ - **Resolution**: 256x256
31
+ - **License**: Apache 2.0
32
+
33
+ ## Usage (PyTorch)
34
+
35
+ ```python
36
+ from transformers import MobileViTForImageClassification, MobileViTImageProcessor
37
+ from PIL import Image
38
+ import torch
39
+
40
+ # 1. Load Model
41
+ model = MobileViTForImageClassification.from_pretrained("YOUR_USERNAME/mithu-mobilevit-dr")
42
+ processor = MobileViTImageProcessor.from_pretrained("YOUR_USERNAME/mithu-mobilevit-dr")
43
+
44
+ # 2. Load Image
45
+ image = Image.open("path_to_eye_scan.jpg").convert("RGB")
46
+
47
+ # 3. Predict
48
+ inputs = processor(images=image, return_tensors="pt")
49
+ with torch.no_grad():
50
+ outputs = model(**inputs)
51
+
52
+ print("Predicted Class:", model.config.id2label[outputs.logits.argmax(-1).item()])
53
+
54
+ ```
55
+
56
+ ## Usage (ONNX)
57
+
58
+ ```python
59
+ import onnxruntime as ort
60
+ import numpy as np
61
+ from PIL import Image
62
+
63
+ # 1. Start Session
64
+ session = ort.InferenceSession("mithu-vit.onnx")
65
+
66
+ # 2. Prepare Input
67
+ img = Image.open("test.jpg").resize((256, 256))
68
+ img_data = np.array(img).transpose(2, 0, 1).astype(np.float32) / 255.0
69
+ img_data = np.expand_dims(img_data, axis=0)
70
+
71
+ # 3. Run
72
+ outputs = session.run(None, {"pixel_values": img_data})
73
+ print("Logits:", outputs[0])
74
+
75
+ ```
76
+
config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MobileViTForImageClassification"
4
+ ],
5
+ "aspp_dropout_prob": 0.1,
6
+ "aspp_out_channels": 256,
7
+ "atrous_rates": [
8
+ 6,
9
+ 12,
10
+ 18
11
+ ],
12
+ "attention_probs_dropout_prob": 0.0,
13
+ "classifier_dropout_prob": 0.1,
14
+ "conv_kernel_size": 3,
15
+ "dtype": "float32",
16
+ "expand_ratio": 4.0,
17
+ "hidden_act": "silu",
18
+ "hidden_dropout_prob": 0.1,
19
+ "hidden_sizes": [
20
+ 144,
21
+ 192,
22
+ 240
23
+ ],
24
+ "id2label": {
25
+ "0": "0",
26
+ "1": "1",
27
+ "2": "2",
28
+ "3": "3",
29
+ "4": "4"
30
+ },
31
+ "image_size": 256,
32
+ "initializer_range": 0.02,
33
+ "label2id": {
34
+ "0": "0",
35
+ "1": "1",
36
+ "2": "2",
37
+ "3": "3",
38
+ "4": "4"
39
+ },
40
+ "layer_norm_eps": 1e-05,
41
+ "mlp_ratio": 2.0,
42
+ "model_type": "mobilevit",
43
+ "neck_hidden_sizes": [
44
+ 16,
45
+ 32,
46
+ 64,
47
+ 96,
48
+ 128,
49
+ 160,
50
+ 640
51
+ ],
52
+ "num_attention_heads": 4,
53
+ "num_channels": 3,
54
+ "output_stride": 32,
55
+ "patch_size": 2,
56
+ "problem_type": "single_label_classification",
57
+ "qkv_bias": true,
58
+ "semantic_loss_ignore_index": 255,
59
+ "transformers_version": "4.57.1"
60
+ }
mithu-vit.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d29b9baa72245e419481fa0c225f55de5092546fbfc2f8ff6fd0e92ddb382ee
3
+ size 20029305
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94234ee5222fbdb8247a47c31b38994daf6a249edf350564134d70445c2b8095
3
+ size 19859260
preprocessor_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 256,
4
+ "width": 256
5
+ },
6
+ "do_center_crop": true,
7
+ "do_flip_channel_order": true,
8
+ "do_flip_channels": true,
9
+ "do_reduce_labels": false,
10
+ "do_rescale": true,
11
+ "do_resize": true,
12
+ "image_processor_type": "MobileViTImageProcessor",
13
+ "resample": 2,
14
+ "rescale_factor": 0.00392156862745098,
15
+ "size": {
16
+ "shortest_edge": 288
17
+ }
18
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:684f7993cad9ee69fedd1c8ac84f26605ef06bd143357b7c1a287785fb61548d
3
+ size 5841