thanks to yisol ❤

Files changed (2) hide show

image_encoder/config.json CHANGED Viewed

@@ -1,25 +1,23 @@
 {
-  "_name_or_path": "camenduru/IDM-VTON-F16",
   "architectures": [
-    "CLIPTextModelWithProjection"
   ],
   "attention_dropout": 0.0,
-  "bos_token_id": 0,
   "dropout": 0.0,
-  "eos_token_id": 2,
   "hidden_act": "gelu",
   "hidden_size": 1280,
   "initializer_factor": 1.0,
   "initializer_range": 0.02,
   "intermediate_size": 5120,
   "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 77,
-  "model_type": "clip_text_model",
-  "num_attention_heads": 20,
   "num_hidden_layers": 32,
-  "pad_token_id": 1,
-  "projection_dim": 1280,
   "torch_dtype": "float16",
-  "transformers_version": "4.40.0",
-  "vocab_size": 49408
 }

 {
+  "_name_or_path": "yisol/IDM-VTON",
   "architectures": [
+    "CLIPVisionModelWithProjection"
   ],
   "attention_dropout": 0.0,
   "dropout": 0.0,
   "hidden_act": "gelu",
   "hidden_size": 1280,
+  "image_size": 224,
   "initializer_factor": 1.0,
   "initializer_range": 0.02,
   "intermediate_size": 5120,
   "layer_norm_eps": 1e-05,
+  "model_type": "clip_vision_model",
+  "num_attention_heads": 16,
+  "num_channels": 3,
   "num_hidden_layers": 32,
+  "patch_size": 14,
+  "projection_dim": 1024,
   "torch_dtype": "float16",
+  "transformers_version": "4.40.0"
 }

image_encoder/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec310df2af79c318e24d20511b601a591ca8cd4f1fce1d8dff822a356bcdb1f4
-size 1389382176

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae616c24393dd1854372b0639e5541666f7521cbe219669255e865cb7f89466a
+size 1264217240