ondame
/

image-classifier

Image Classification

multi-head-classification

room-classification

computer-vision

scene-classification

Model card Files Files and versions

justin-onda commited on Nov 14, 2025

Commit

553314a

·

1 Parent(s): 3662de4

model_info.json 현행화

Files changed (2) hide show

model.onnx +0 -3
model_info.json +25 -4

model.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:12651f636f6b372c3c5d7eb737e98e6db4a59b435686ac8606882fe0b56b455e
-size 1213423807

model_info.json CHANGED Viewed

@@ -1,10 +1,31 @@
 {
   "model_architecture": {
-    "backbone": "vit_large_patch14_dinov2.lvd142m",
     "feature_dim": 1024,
-    "total_parameters": 303252502,
-    "trainable_parameters": 24598,
-    "freeze_backbone": true
   },
   "input_specification": {
     "image_size": [

 {
   "model_architecture": {
+    "backbone": "facebook/dinov2-large",
+    "backbone_details": {
+      "model_type": "Vision Transformer (ViT)",
+      "variant": "Large",
+      "patch_size": 14,
+      "num_hidden_layers": 24,
+      "num_attention_heads": 16,
+      "hidden_size": 1024,
+      "intermediate_size": 4096,
+      "pretrained_image_size": 518,
+      "finetuned_image_size": 224
+    },
     "feature_dim": 1024,
+    "encoder_parameters": 304367634,
+    "head_trainable_parameters": 24598,
+    "freeze_backbone": true,
+    "encoder_output_shape": {
+      "description": "Encoder outputs full sequence of tokens including CLS token",
+      "raw_shape": ["batch_size", 257, 1024],
+      "tokens_breakdown": {
+        "cls_token": 1,
+        "patch_tokens": 256,
+        "total": 257
+      },
+      "usage": "CLS token (index 0) is extracted for feature representation"
+    }
   },
   "input_specification": {
     "image_size": [