prithivMLmods commited on Nov 13, 2025

Commit

5e074b4

verified ·

1 Parent(s): 7afabec

Upload folder using huggingface_hub

Browse files

Files changed (36) hide show

checkpoint-1022/config.json +62 -0
checkpoint-1022/model.safetensors +3 -0
checkpoint-1022/optimizer.pt +3 -0
checkpoint-1022/preprocessor_config.json +29 -0
checkpoint-1022/rng_state.pth +3 -0
checkpoint-1022/scheduler.pt +3 -0
checkpoint-1022/trainer_state.json +68 -0
checkpoint-1022/training_args.bin +3 -0
checkpoint-1533/config.json +62 -0
checkpoint-1533/model.safetensors +3 -0
checkpoint-1533/optimizer.pt +3 -0
checkpoint-1533/preprocessor_config.json +29 -0
checkpoint-1533/rng_state.pth +3 -0
checkpoint-1533/scheduler.pt +3 -0
checkpoint-1533/trainer_state.json +85 -0
checkpoint-1533/training_args.bin +3 -0
checkpoint-2044/config.json +62 -0
checkpoint-2044/model.safetensors +3 -0
checkpoint-2044/optimizer.pt +3 -0
checkpoint-2044/preprocessor_config.json +29 -0
checkpoint-2044/rng_state.pth +3 -0
checkpoint-2044/scheduler.pt +3 -0
checkpoint-2044/trainer_state.json +102 -0
checkpoint-2044/training_args.bin +3 -0
checkpoint-511/config.json +62 -0
checkpoint-511/model.safetensors +3 -0
checkpoint-511/optimizer.pt +3 -0
checkpoint-511/preprocessor_config.json +29 -0
checkpoint-511/rng_state.pth +3 -0
checkpoint-511/scheduler.pt +3 -0
checkpoint-511/trainer_state.json +51 -0
checkpoint-511/training_args.bin +3 -0
config.json +62 -0
model.safetensors +3 -0
preprocessor_config.json +29 -0
training_args.bin +3 -0

checkpoint-1022/config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "architectures": [
+    "MetaClip2ForImageClassification"
+  ],
+  "dtype": "float32",
+  "id2label": {
+    "0": "buildings",
+    "1": "forest",
+    "2": "glacier",
+    "3": "mountain",
+    "4": "sea",
+    "5": "street"
+  },
+  "initializer_factor": 1.0,
+  "label2id": {
+    "buildings": 0,
+    "forest": 1,
+    "glacier": 2,
+    "mountain": 3,
+    "sea": 4,
+    "street": 5
+  },
+  "logit_scale_init_value": 2.6592,
+  "model_type": "metaclip_2",
+  "problem_type": "single_label_classification",
+  "projection_dim": 384,
+  "text_config": {
+    "attention_dropout": 0.0,
+    "dtype": "float32",
+    "eos_token_id": 2,
+    "hidden_act": "gelu",
+    "hidden_size": 384,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 1536,
+    "layer_norm_eps": 1e-05,
+    "max_position_embeddings": 77,
+    "model_type": "metaclip_2_text_model",
+    "num_attention_heads": 6,
+    "num_hidden_layers": 12,
+    "projection_dim": 384,
+    "vocab_size": 901629
+  },
+  "transformers_version": "4.57.1",
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "dtype": "float32",
+    "hidden_act": "gelu",
+    "hidden_size": 384,
+    "image_size": 224,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 1536,
+    "layer_norm_eps": 1e-05,
+    "model_type": "metaclip_2_vision_model",
+    "num_attention_heads": 6,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "patch_size": 16,
+    "projection_dim": 384
+  }
+}

checkpoint-1022/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4445ef3607276fd300c733bc6b5d0b913b627090823bd3b3547372be70e1a003
+size 86697088

checkpoint-1022/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba6d7cd4ca0eebb89f661efefee79dcef2dc08a807db8f2f36d9c08d1c5e5d87
+size 173510411

checkpoint-1022/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "CLIPImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "processor_class": "CLIPProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

checkpoint-1022/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:580121e158afb32d87a4935d1c33604c21690fe550c9ceba20ca570f3133ac35
+size 14645

checkpoint-1022/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e8e72b1ca276963c8724ed881dd1ba1083b317c8403109e46c75b22ab3a47e5
+size 1465

checkpoint-1022/trainer_state.json ADDED Viewed

	@@ -0,0 +1,68 @@

+{
+  "best_global_step": 1022,
+  "best_metric": 0.12706944346427917,
+  "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-1022",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 1022,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.9784735812133072,
+      "grad_norm": 13.377490043640137,
+      "learning_rate": 1.5496489468405215e-05,
+      "loss": 0.3912,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.933435301315387,
+      "eval_loss": 0.18303145468235016,
+      "eval_model_preparation_time": 0.013,
+      "eval_runtime": 81.9678,
+      "eval_samples_per_second": 199.408,
+      "eval_steps_per_second": 24.937,
+      "step": 511
+    },
+    {
+      "epoch": 1.9569471624266144,
+      "grad_norm": 30.63884925842285,
+      "learning_rate": 1.0481444332998999e-05,
+      "loss": 0.1978,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9562557356989905,
+      "eval_loss": 0.12706944346427917,
+      "eval_model_preparation_time": 0.013,
+      "eval_runtime": 78.5588,
+      "eval_samples_per_second": 208.061,
+      "eval_steps_per_second": 26.019,
+      "step": 1022
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2044,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6.375153034156032e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1022/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49a90bc66716cb082ec5c00af9d059cdce153b87ee7290bee045c716ff787c4e
+size 5777

checkpoint-1533/config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "architectures": [
+    "MetaClip2ForImageClassification"
+  ],
+  "dtype": "float32",
+  "id2label": {
+    "0": "buildings",
+    "1": "forest",
+    "2": "glacier",
+    "3": "mountain",
+    "4": "sea",
+    "5": "street"
+  },
+  "initializer_factor": 1.0,
+  "label2id": {
+    "buildings": 0,
+    "forest": 1,
+    "glacier": 2,
+    "mountain": 3,
+    "sea": 4,
+    "street": 5
+  },
+  "logit_scale_init_value": 2.6592,
+  "model_type": "metaclip_2",
+  "problem_type": "single_label_classification",
+  "projection_dim": 384,
+  "text_config": {
+    "attention_dropout": 0.0,
+    "dtype": "float32",
+    "eos_token_id": 2,
+    "hidden_act": "gelu",
+    "hidden_size": 384,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 1536,
+    "layer_norm_eps": 1e-05,
+    "max_position_embeddings": 77,
+    "model_type": "metaclip_2_text_model",
+    "num_attention_heads": 6,
+    "num_hidden_layers": 12,
+    "projection_dim": 384,
+    "vocab_size": 901629
+  },
+  "transformers_version": "4.57.1",
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "dtype": "float32",
+    "hidden_act": "gelu",
+    "hidden_size": 384,
+    "image_size": 224,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 1536,
+    "layer_norm_eps": 1e-05,
+    "model_type": "metaclip_2_vision_model",
+    "num_attention_heads": 6,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "patch_size": 16,
+    "projection_dim": 384
+  }
+}

checkpoint-1533/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f5281599aaef205fc8c4fa969920d30266a6014cd746868f3ecbb12a803f40c
+size 86697088

checkpoint-1533/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:004a43362ae89b1f7a070eb1da38b7a2c54f3e6acd554d60f273d0c82dc5041a
+size 173510411

checkpoint-1533/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "CLIPImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "processor_class": "CLIPProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

checkpoint-1533/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d771439459010dd3de8d0bcd5d2033a970e9e366f92a7da2c92827bcb6c146e
+size 14645

checkpoint-1533/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffdc07a813ccb175943453a197bc878d6789a59b4a980cb9a40f867bb5b3df1f
+size 1465

checkpoint-1533/trainer_state.json ADDED Viewed

	@@ -0,0 +1,85 @@

+{
+  "best_global_step": 1533,
+  "best_metric": 0.09816861152648926,
+  "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-1533",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1533,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.9784735812133072,
+      "grad_norm": 13.377490043640137,
+      "learning_rate": 1.5496489468405215e-05,
+      "loss": 0.3912,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.933435301315387,
+      "eval_loss": 0.18303145468235016,
+      "eval_model_preparation_time": 0.013,
+      "eval_runtime": 81.9678,
+      "eval_samples_per_second": 199.408,
+      "eval_steps_per_second": 24.937,
+      "step": 511
+    },
+    {
+      "epoch": 1.9569471624266144,
+      "grad_norm": 30.63884925842285,
+      "learning_rate": 1.0481444332998999e-05,
+      "loss": 0.1978,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9562557356989905,
+      "eval_loss": 0.12706944346427917,
+      "eval_model_preparation_time": 0.013,
+      "eval_runtime": 78.5588,
+      "eval_samples_per_second": 208.061,
+      "eval_steps_per_second": 26.019,
+      "step": 1022
+    },
+    {
+      "epoch": 2.935420743639922,
+      "grad_norm": 18.30719757080078,
+      "learning_rate": 5.4663991975927785e-06,
+      "loss": 0.1506,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.96690119302539,
+      "eval_loss": 0.09816861152648926,
+      "eval_model_preparation_time": 0.013,
+      "eval_runtime": 78.111,
+      "eval_samples_per_second": 209.254,
+      "eval_steps_per_second": 26.168,
+      "step": 1533
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2044,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 9.562729551234048e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1533/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49a90bc66716cb082ec5c00af9d059cdce153b87ee7290bee045c716ff787c4e
+size 5777

checkpoint-2044/config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "architectures": [
+    "MetaClip2ForImageClassification"
+  ],
+  "dtype": "float32",
+  "id2label": {
+    "0": "buildings",
+    "1": "forest",
+    "2": "glacier",
+    "3": "mountain",
+    "4": "sea",
+    "5": "street"
+  },
+  "initializer_factor": 1.0,
+  "label2id": {
+    "buildings": 0,
+    "forest": 1,
+    "glacier": 2,
+    "mountain": 3,
+    "sea": 4,
+    "street": 5
+  },
+  "logit_scale_init_value": 2.6592,
+  "model_type": "metaclip_2",
+  "problem_type": "single_label_classification",
+  "projection_dim": 384,
+  "text_config": {
+    "attention_dropout": 0.0,
+    "dtype": "float32",
+    "eos_token_id": 2,
+    "hidden_act": "gelu",
+    "hidden_size": 384,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 1536,
+    "layer_norm_eps": 1e-05,
+    "max_position_embeddings": 77,
+    "model_type": "metaclip_2_text_model",
+    "num_attention_heads": 6,
+    "num_hidden_layers": 12,
+    "projection_dim": 384,
+    "vocab_size": 901629
+  },
+  "transformers_version": "4.57.1",
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "dtype": "float32",
+    "hidden_act": "gelu",
+    "hidden_size": 384,
+    "image_size": 224,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 1536,
+    "layer_norm_eps": 1e-05,
+    "model_type": "metaclip_2_vision_model",
+    "num_attention_heads": 6,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "patch_size": 16,
+    "projection_dim": 384
+  }
+}

checkpoint-2044/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e38b489c601470b226091b616cfc84875e04e8e51d5ebdb4698cae9348fd3da
+size 86697088

checkpoint-2044/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b447b823a16aae41fa179c5123e1a1bd0d8b2042209d413eea185484837cfaa7
+size 173510411

checkpoint-2044/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "CLIPImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "processor_class": "CLIPProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

checkpoint-2044/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e033d9c7f6ff9779f056fa2bfbfeae3dd8cf43cfa8c2381b108c09bcf2c95ba7
+size 14645

checkpoint-2044/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c927e74abc05e1235c65211b2ede4c42421b1efa95a07bea360dcc8c819e7272
+size 1465

checkpoint-2044/trainer_state.json ADDED Viewed

	@@ -0,0 +1,102 @@

+{
+  "best_global_step": 2044,
+  "best_metric": 0.08643808960914612,
+  "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-2044",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 2044,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.9784735812133072,
+      "grad_norm": 13.377490043640137,
+      "learning_rate": 1.5496489468405215e-05,
+      "loss": 0.3912,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.933435301315387,
+      "eval_loss": 0.18303145468235016,
+      "eval_model_preparation_time": 0.013,
+      "eval_runtime": 81.9678,
+      "eval_samples_per_second": 199.408,
+      "eval_steps_per_second": 24.937,
+      "step": 511
+    },
+    {
+      "epoch": 1.9569471624266144,
+      "grad_norm": 30.63884925842285,
+      "learning_rate": 1.0481444332998999e-05,
+      "loss": 0.1978,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9562557356989905,
+      "eval_loss": 0.12706944346427917,
+      "eval_model_preparation_time": 0.013,
+      "eval_runtime": 78.5588,
+      "eval_samples_per_second": 208.061,
+      "eval_steps_per_second": 26.019,
+      "step": 1022
+    },
+    {
+      "epoch": 2.935420743639922,
+      "grad_norm": 18.30719757080078,
+      "learning_rate": 5.4663991975927785e-06,
+      "loss": 0.1506,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.96690119302539,
+      "eval_loss": 0.09816861152648926,
+      "eval_model_preparation_time": 0.013,
+      "eval_runtime": 78.111,
+      "eval_samples_per_second": 209.254,
+      "eval_steps_per_second": 26.168,
+      "step": 1533
+    },
+    {
+      "epoch": 3.9138943248532287,
+      "grad_norm": 15.605788230895996,
+      "learning_rate": 4.5135406218655974e-07,
+      "loss": 0.1113,
+      "step": 2000
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.9706332211685531,
+      "eval_loss": 0.08643808960914612,
+      "eval_model_preparation_time": 0.013,
+      "eval_runtime": 78.2892,
+      "eval_samples_per_second": 208.777,
+      "eval_steps_per_second": 26.108,
+      "step": 2044
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2044,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.2750306068312064e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2044/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49a90bc66716cb082ec5c00af9d059cdce153b87ee7290bee045c716ff787c4e
+size 5777

checkpoint-511/config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "architectures": [
+    "MetaClip2ForImageClassification"
+  ],
+  "dtype": "float32",
+  "id2label": {
+    "0": "buildings",
+    "1": "forest",
+    "2": "glacier",
+    "3": "mountain",
+    "4": "sea",
+    "5": "street"
+  },
+  "initializer_factor": 1.0,
+  "label2id": {
+    "buildings": 0,
+    "forest": 1,
+    "glacier": 2,
+    "mountain": 3,
+    "sea": 4,
+    "street": 5
+  },
+  "logit_scale_init_value": 2.6592,
+  "model_type": "metaclip_2",
+  "problem_type": "single_label_classification",
+  "projection_dim": 384,
+  "text_config": {
+    "attention_dropout": 0.0,
+    "dtype": "float32",
+    "eos_token_id": 2,
+    "hidden_act": "gelu",
+    "hidden_size": 384,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 1536,
+    "layer_norm_eps": 1e-05,
+    "max_position_embeddings": 77,
+    "model_type": "metaclip_2_text_model",
+    "num_attention_heads": 6,
+    "num_hidden_layers": 12,
+    "projection_dim": 384,
+    "vocab_size": 901629
+  },
+  "transformers_version": "4.57.1",
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "dtype": "float32",
+    "hidden_act": "gelu",
+    "hidden_size": 384,
+    "image_size": 224,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 1536,
+    "layer_norm_eps": 1e-05,
+    "model_type": "metaclip_2_vision_model",
+    "num_attention_heads": 6,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "patch_size": 16,
+    "projection_dim": 384
+  }
+}

checkpoint-511/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d793bb7e31fe1522ed7baa3af0fcc747982f07f02d3ad5594e2cc34905eb4f0
+size 86697088

checkpoint-511/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:abf453e6617df23ff86a1b6c31189074d6103e691daa93306e41fc66e417290a
+size 173510411

checkpoint-511/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "CLIPImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "processor_class": "CLIPProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

checkpoint-511/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e13278bc0b31f9c6175e72aeb9d66009684ebad7bf515a7f30cef7c3f69d6dfd
+size 14645

checkpoint-511/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42a702253fe8084d1005ae19eaa0876667e9fa60b259cce6d5ff863d34659d5d
+size 1465

checkpoint-511/trainer_state.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "best_global_step": 511,
+  "best_metric": 0.18303145468235016,
+  "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-511",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 511,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.9784735812133072,
+      "grad_norm": 13.377490043640137,
+      "learning_rate": 1.5496489468405215e-05,
+      "loss": 0.3912,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.933435301315387,
+      "eval_loss": 0.18303145468235016,
+      "eval_model_preparation_time": 0.013,
+      "eval_runtime": 81.9678,
+      "eval_samples_per_second": 199.408,
+      "eval_steps_per_second": 24.937,
+      "step": 511
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2044,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.187576517078016e+17,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-511/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49a90bc66716cb082ec5c00af9d059cdce153b87ee7290bee045c716ff787c4e
+size 5777

config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "architectures": [
+    "MetaClip2ForImageClassification"
+  ],
+  "dtype": "float32",
+  "id2label": {
+    "0": "buildings",
+    "1": "forest",
+    "2": "glacier",
+    "3": "mountain",
+    "4": "sea",
+    "5": "street"
+  },
+  "initializer_factor": 1.0,
+  "label2id": {
+    "buildings": 0,
+    "forest": 1,
+    "glacier": 2,
+    "mountain": 3,
+    "sea": 4,
+    "street": 5
+  },
+  "logit_scale_init_value": 2.6592,
+  "model_type": "metaclip_2",
+  "problem_type": "single_label_classification",
+  "projection_dim": 384,
+  "text_config": {
+    "attention_dropout": 0.0,
+    "dtype": "float32",
+    "eos_token_id": 2,
+    "hidden_act": "gelu",
+    "hidden_size": 384,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 1536,
+    "layer_norm_eps": 1e-05,
+    "max_position_embeddings": 77,
+    "model_type": "metaclip_2_text_model",
+    "num_attention_heads": 6,
+    "num_hidden_layers": 12,
+    "projection_dim": 384,
+    "vocab_size": 901629
+  },
+  "transformers_version": "4.57.1",
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "dtype": "float32",
+    "hidden_act": "gelu",
+    "hidden_size": 384,
+    "image_size": 224,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 1536,
+    "layer_norm_eps": 1e-05,
+    "model_type": "metaclip_2_vision_model",
+    "num_attention_heads": 6,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "patch_size": 16,
+    "projection_dim": 384
+  }
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e38b489c601470b226091b616cfc84875e04e8e51d5ebdb4698cae9348fd3da
+size 86697088

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "CLIPImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "processor_class": "CLIPProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49a90bc66716cb082ec5c00af9d059cdce153b87ee7290bee045c716ff787c4e
+size 5777