prithivMLmods commited on
Commit
5e074b4
·
verified ·
1 Parent(s): 7afabec

Upload folder using huggingface_hub

Browse files
checkpoint-1022/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MetaClip2ForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "buildings",
8
+ "1": "forest",
9
+ "2": "glacier",
10
+ "3": "mountain",
11
+ "4": "sea",
12
+ "5": "street"
13
+ },
14
+ "initializer_factor": 1.0,
15
+ "label2id": {
16
+ "buildings": 0,
17
+ "forest": 1,
18
+ "glacier": 2,
19
+ "mountain": 3,
20
+ "sea": 4,
21
+ "street": 5
22
+ },
23
+ "logit_scale_init_value": 2.6592,
24
+ "model_type": "metaclip_2",
25
+ "problem_type": "single_label_classification",
26
+ "projection_dim": 384,
27
+ "text_config": {
28
+ "attention_dropout": 0.0,
29
+ "dtype": "float32",
30
+ "eos_token_id": 2,
31
+ "hidden_act": "gelu",
32
+ "hidden_size": 384,
33
+ "initializer_factor": 1.0,
34
+ "initializer_range": 0.02,
35
+ "intermediate_size": 1536,
36
+ "layer_norm_eps": 1e-05,
37
+ "max_position_embeddings": 77,
38
+ "model_type": "metaclip_2_text_model",
39
+ "num_attention_heads": 6,
40
+ "num_hidden_layers": 12,
41
+ "projection_dim": 384,
42
+ "vocab_size": 901629
43
+ },
44
+ "transformers_version": "4.57.1",
45
+ "vision_config": {
46
+ "attention_dropout": 0.0,
47
+ "dtype": "float32",
48
+ "hidden_act": "gelu",
49
+ "hidden_size": 384,
50
+ "image_size": 224,
51
+ "initializer_factor": 1.0,
52
+ "initializer_range": 0.02,
53
+ "intermediate_size": 1536,
54
+ "layer_norm_eps": 1e-05,
55
+ "model_type": "metaclip_2_vision_model",
56
+ "num_attention_heads": 6,
57
+ "num_channels": 3,
58
+ "num_hidden_layers": 12,
59
+ "patch_size": 16,
60
+ "projection_dim": 384
61
+ }
62
+ }
checkpoint-1022/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4445ef3607276fd300c733bc6b5d0b913b627090823bd3b3547372be70e1a003
3
+ size 86697088
checkpoint-1022/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba6d7cd4ca0eebb89f661efefee79dcef2dc08a807db8f2f36d9c08d1c5e5d87
3
+ size 173510411
checkpoint-1022/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "processor_class": "CLIPProcessor",
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ }
checkpoint-1022/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:580121e158afb32d87a4935d1c33604c21690fe550c9ceba20ca570f3133ac35
3
+ size 14645
checkpoint-1022/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e8e72b1ca276963c8724ed881dd1ba1083b317c8403109e46c75b22ab3a47e5
3
+ size 1465
checkpoint-1022/trainer_state.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1022,
3
+ "best_metric": 0.12706944346427917,
4
+ "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-1022",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1022,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.9784735812133072,
14
+ "grad_norm": 13.377490043640137,
15
+ "learning_rate": 1.5496489468405215e-05,
16
+ "loss": 0.3912,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy": 0.933435301315387,
22
+ "eval_loss": 0.18303145468235016,
23
+ "eval_model_preparation_time": 0.013,
24
+ "eval_runtime": 81.9678,
25
+ "eval_samples_per_second": 199.408,
26
+ "eval_steps_per_second": 24.937,
27
+ "step": 511
28
+ },
29
+ {
30
+ "epoch": 1.9569471624266144,
31
+ "grad_norm": 30.63884925842285,
32
+ "learning_rate": 1.0481444332998999e-05,
33
+ "loss": 0.1978,
34
+ "step": 1000
35
+ },
36
+ {
37
+ "epoch": 2.0,
38
+ "eval_accuracy": 0.9562557356989905,
39
+ "eval_loss": 0.12706944346427917,
40
+ "eval_model_preparation_time": 0.013,
41
+ "eval_runtime": 78.5588,
42
+ "eval_samples_per_second": 208.061,
43
+ "eval_steps_per_second": 26.019,
44
+ "step": 1022
45
+ }
46
+ ],
47
+ "logging_steps": 500,
48
+ "max_steps": 2044,
49
+ "num_input_tokens_seen": 0,
50
+ "num_train_epochs": 4,
51
+ "save_steps": 500,
52
+ "stateful_callbacks": {
53
+ "TrainerControl": {
54
+ "args": {
55
+ "should_epoch_stop": false,
56
+ "should_evaluate": false,
57
+ "should_log": false,
58
+ "should_save": true,
59
+ "should_training_stop": false
60
+ },
61
+ "attributes": {}
62
+ }
63
+ },
64
+ "total_flos": 6.375153034156032e+17,
65
+ "train_batch_size": 32,
66
+ "trial_name": null,
67
+ "trial_params": null
68
+ }
checkpoint-1022/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a90bc66716cb082ec5c00af9d059cdce153b87ee7290bee045c716ff787c4e
3
+ size 5777
checkpoint-1533/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MetaClip2ForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "buildings",
8
+ "1": "forest",
9
+ "2": "glacier",
10
+ "3": "mountain",
11
+ "4": "sea",
12
+ "5": "street"
13
+ },
14
+ "initializer_factor": 1.0,
15
+ "label2id": {
16
+ "buildings": 0,
17
+ "forest": 1,
18
+ "glacier": 2,
19
+ "mountain": 3,
20
+ "sea": 4,
21
+ "street": 5
22
+ },
23
+ "logit_scale_init_value": 2.6592,
24
+ "model_type": "metaclip_2",
25
+ "problem_type": "single_label_classification",
26
+ "projection_dim": 384,
27
+ "text_config": {
28
+ "attention_dropout": 0.0,
29
+ "dtype": "float32",
30
+ "eos_token_id": 2,
31
+ "hidden_act": "gelu",
32
+ "hidden_size": 384,
33
+ "initializer_factor": 1.0,
34
+ "initializer_range": 0.02,
35
+ "intermediate_size": 1536,
36
+ "layer_norm_eps": 1e-05,
37
+ "max_position_embeddings": 77,
38
+ "model_type": "metaclip_2_text_model",
39
+ "num_attention_heads": 6,
40
+ "num_hidden_layers": 12,
41
+ "projection_dim": 384,
42
+ "vocab_size": 901629
43
+ },
44
+ "transformers_version": "4.57.1",
45
+ "vision_config": {
46
+ "attention_dropout": 0.0,
47
+ "dtype": "float32",
48
+ "hidden_act": "gelu",
49
+ "hidden_size": 384,
50
+ "image_size": 224,
51
+ "initializer_factor": 1.0,
52
+ "initializer_range": 0.02,
53
+ "intermediate_size": 1536,
54
+ "layer_norm_eps": 1e-05,
55
+ "model_type": "metaclip_2_vision_model",
56
+ "num_attention_heads": 6,
57
+ "num_channels": 3,
58
+ "num_hidden_layers": 12,
59
+ "patch_size": 16,
60
+ "projection_dim": 384
61
+ }
62
+ }
checkpoint-1533/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f5281599aaef205fc8c4fa969920d30266a6014cd746868f3ecbb12a803f40c
3
+ size 86697088
checkpoint-1533/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:004a43362ae89b1f7a070eb1da38b7a2c54f3e6acd554d60f273d0c82dc5041a
3
+ size 173510411
checkpoint-1533/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "processor_class": "CLIPProcessor",
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ }
checkpoint-1533/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d771439459010dd3de8d0bcd5d2033a970e9e366f92a7da2c92827bcb6c146e
3
+ size 14645
checkpoint-1533/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffdc07a813ccb175943453a197bc878d6789a59b4a980cb9a40f867bb5b3df1f
3
+ size 1465
checkpoint-1533/trainer_state.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1533,
3
+ "best_metric": 0.09816861152648926,
4
+ "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-1533",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1533,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.9784735812133072,
14
+ "grad_norm": 13.377490043640137,
15
+ "learning_rate": 1.5496489468405215e-05,
16
+ "loss": 0.3912,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy": 0.933435301315387,
22
+ "eval_loss": 0.18303145468235016,
23
+ "eval_model_preparation_time": 0.013,
24
+ "eval_runtime": 81.9678,
25
+ "eval_samples_per_second": 199.408,
26
+ "eval_steps_per_second": 24.937,
27
+ "step": 511
28
+ },
29
+ {
30
+ "epoch": 1.9569471624266144,
31
+ "grad_norm": 30.63884925842285,
32
+ "learning_rate": 1.0481444332998999e-05,
33
+ "loss": 0.1978,
34
+ "step": 1000
35
+ },
36
+ {
37
+ "epoch": 2.0,
38
+ "eval_accuracy": 0.9562557356989905,
39
+ "eval_loss": 0.12706944346427917,
40
+ "eval_model_preparation_time": 0.013,
41
+ "eval_runtime": 78.5588,
42
+ "eval_samples_per_second": 208.061,
43
+ "eval_steps_per_second": 26.019,
44
+ "step": 1022
45
+ },
46
+ {
47
+ "epoch": 2.935420743639922,
48
+ "grad_norm": 18.30719757080078,
49
+ "learning_rate": 5.4663991975927785e-06,
50
+ "loss": 0.1506,
51
+ "step": 1500
52
+ },
53
+ {
54
+ "epoch": 3.0,
55
+ "eval_accuracy": 0.96690119302539,
56
+ "eval_loss": 0.09816861152648926,
57
+ "eval_model_preparation_time": 0.013,
58
+ "eval_runtime": 78.111,
59
+ "eval_samples_per_second": 209.254,
60
+ "eval_steps_per_second": 26.168,
61
+ "step": 1533
62
+ }
63
+ ],
64
+ "logging_steps": 500,
65
+ "max_steps": 2044,
66
+ "num_input_tokens_seen": 0,
67
+ "num_train_epochs": 4,
68
+ "save_steps": 500,
69
+ "stateful_callbacks": {
70
+ "TrainerControl": {
71
+ "args": {
72
+ "should_epoch_stop": false,
73
+ "should_evaluate": false,
74
+ "should_log": false,
75
+ "should_save": true,
76
+ "should_training_stop": false
77
+ },
78
+ "attributes": {}
79
+ }
80
+ },
81
+ "total_flos": 9.562729551234048e+17,
82
+ "train_batch_size": 32,
83
+ "trial_name": null,
84
+ "trial_params": null
85
+ }
checkpoint-1533/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a90bc66716cb082ec5c00af9d059cdce153b87ee7290bee045c716ff787c4e
3
+ size 5777
checkpoint-2044/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MetaClip2ForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "buildings",
8
+ "1": "forest",
9
+ "2": "glacier",
10
+ "3": "mountain",
11
+ "4": "sea",
12
+ "5": "street"
13
+ },
14
+ "initializer_factor": 1.0,
15
+ "label2id": {
16
+ "buildings": 0,
17
+ "forest": 1,
18
+ "glacier": 2,
19
+ "mountain": 3,
20
+ "sea": 4,
21
+ "street": 5
22
+ },
23
+ "logit_scale_init_value": 2.6592,
24
+ "model_type": "metaclip_2",
25
+ "problem_type": "single_label_classification",
26
+ "projection_dim": 384,
27
+ "text_config": {
28
+ "attention_dropout": 0.0,
29
+ "dtype": "float32",
30
+ "eos_token_id": 2,
31
+ "hidden_act": "gelu",
32
+ "hidden_size": 384,
33
+ "initializer_factor": 1.0,
34
+ "initializer_range": 0.02,
35
+ "intermediate_size": 1536,
36
+ "layer_norm_eps": 1e-05,
37
+ "max_position_embeddings": 77,
38
+ "model_type": "metaclip_2_text_model",
39
+ "num_attention_heads": 6,
40
+ "num_hidden_layers": 12,
41
+ "projection_dim": 384,
42
+ "vocab_size": 901629
43
+ },
44
+ "transformers_version": "4.57.1",
45
+ "vision_config": {
46
+ "attention_dropout": 0.0,
47
+ "dtype": "float32",
48
+ "hidden_act": "gelu",
49
+ "hidden_size": 384,
50
+ "image_size": 224,
51
+ "initializer_factor": 1.0,
52
+ "initializer_range": 0.02,
53
+ "intermediate_size": 1536,
54
+ "layer_norm_eps": 1e-05,
55
+ "model_type": "metaclip_2_vision_model",
56
+ "num_attention_heads": 6,
57
+ "num_channels": 3,
58
+ "num_hidden_layers": 12,
59
+ "patch_size": 16,
60
+ "projection_dim": 384
61
+ }
62
+ }
checkpoint-2044/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e38b489c601470b226091b616cfc84875e04e8e51d5ebdb4698cae9348fd3da
3
+ size 86697088
checkpoint-2044/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b447b823a16aae41fa179c5123e1a1bd0d8b2042209d413eea185484837cfaa7
3
+ size 173510411
checkpoint-2044/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "processor_class": "CLIPProcessor",
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ }
checkpoint-2044/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e033d9c7f6ff9779f056fa2bfbfeae3dd8cf43cfa8c2381b108c09bcf2c95ba7
3
+ size 14645
checkpoint-2044/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c927e74abc05e1235c65211b2ede4c42421b1efa95a07bea360dcc8c819e7272
3
+ size 1465
checkpoint-2044/trainer_state.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2044,
3
+ "best_metric": 0.08643808960914612,
4
+ "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-2044",
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 2044,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.9784735812133072,
14
+ "grad_norm": 13.377490043640137,
15
+ "learning_rate": 1.5496489468405215e-05,
16
+ "loss": 0.3912,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy": 0.933435301315387,
22
+ "eval_loss": 0.18303145468235016,
23
+ "eval_model_preparation_time": 0.013,
24
+ "eval_runtime": 81.9678,
25
+ "eval_samples_per_second": 199.408,
26
+ "eval_steps_per_second": 24.937,
27
+ "step": 511
28
+ },
29
+ {
30
+ "epoch": 1.9569471624266144,
31
+ "grad_norm": 30.63884925842285,
32
+ "learning_rate": 1.0481444332998999e-05,
33
+ "loss": 0.1978,
34
+ "step": 1000
35
+ },
36
+ {
37
+ "epoch": 2.0,
38
+ "eval_accuracy": 0.9562557356989905,
39
+ "eval_loss": 0.12706944346427917,
40
+ "eval_model_preparation_time": 0.013,
41
+ "eval_runtime": 78.5588,
42
+ "eval_samples_per_second": 208.061,
43
+ "eval_steps_per_second": 26.019,
44
+ "step": 1022
45
+ },
46
+ {
47
+ "epoch": 2.935420743639922,
48
+ "grad_norm": 18.30719757080078,
49
+ "learning_rate": 5.4663991975927785e-06,
50
+ "loss": 0.1506,
51
+ "step": 1500
52
+ },
53
+ {
54
+ "epoch": 3.0,
55
+ "eval_accuracy": 0.96690119302539,
56
+ "eval_loss": 0.09816861152648926,
57
+ "eval_model_preparation_time": 0.013,
58
+ "eval_runtime": 78.111,
59
+ "eval_samples_per_second": 209.254,
60
+ "eval_steps_per_second": 26.168,
61
+ "step": 1533
62
+ },
63
+ {
64
+ "epoch": 3.9138943248532287,
65
+ "grad_norm": 15.605788230895996,
66
+ "learning_rate": 4.5135406218655974e-07,
67
+ "loss": 0.1113,
68
+ "step": 2000
69
+ },
70
+ {
71
+ "epoch": 4.0,
72
+ "eval_accuracy": 0.9706332211685531,
73
+ "eval_loss": 0.08643808960914612,
74
+ "eval_model_preparation_time": 0.013,
75
+ "eval_runtime": 78.2892,
76
+ "eval_samples_per_second": 208.777,
77
+ "eval_steps_per_second": 26.108,
78
+ "step": 2044
79
+ }
80
+ ],
81
+ "logging_steps": 500,
82
+ "max_steps": 2044,
83
+ "num_input_tokens_seen": 0,
84
+ "num_train_epochs": 4,
85
+ "save_steps": 500,
86
+ "stateful_callbacks": {
87
+ "TrainerControl": {
88
+ "args": {
89
+ "should_epoch_stop": false,
90
+ "should_evaluate": false,
91
+ "should_log": false,
92
+ "should_save": true,
93
+ "should_training_stop": true
94
+ },
95
+ "attributes": {}
96
+ }
97
+ },
98
+ "total_flos": 1.2750306068312064e+18,
99
+ "train_batch_size": 32,
100
+ "trial_name": null,
101
+ "trial_params": null
102
+ }
checkpoint-2044/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a90bc66716cb082ec5c00af9d059cdce153b87ee7290bee045c716ff787c4e
3
+ size 5777
checkpoint-511/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MetaClip2ForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "buildings",
8
+ "1": "forest",
9
+ "2": "glacier",
10
+ "3": "mountain",
11
+ "4": "sea",
12
+ "5": "street"
13
+ },
14
+ "initializer_factor": 1.0,
15
+ "label2id": {
16
+ "buildings": 0,
17
+ "forest": 1,
18
+ "glacier": 2,
19
+ "mountain": 3,
20
+ "sea": 4,
21
+ "street": 5
22
+ },
23
+ "logit_scale_init_value": 2.6592,
24
+ "model_type": "metaclip_2",
25
+ "problem_type": "single_label_classification",
26
+ "projection_dim": 384,
27
+ "text_config": {
28
+ "attention_dropout": 0.0,
29
+ "dtype": "float32",
30
+ "eos_token_id": 2,
31
+ "hidden_act": "gelu",
32
+ "hidden_size": 384,
33
+ "initializer_factor": 1.0,
34
+ "initializer_range": 0.02,
35
+ "intermediate_size": 1536,
36
+ "layer_norm_eps": 1e-05,
37
+ "max_position_embeddings": 77,
38
+ "model_type": "metaclip_2_text_model",
39
+ "num_attention_heads": 6,
40
+ "num_hidden_layers": 12,
41
+ "projection_dim": 384,
42
+ "vocab_size": 901629
43
+ },
44
+ "transformers_version": "4.57.1",
45
+ "vision_config": {
46
+ "attention_dropout": 0.0,
47
+ "dtype": "float32",
48
+ "hidden_act": "gelu",
49
+ "hidden_size": 384,
50
+ "image_size": 224,
51
+ "initializer_factor": 1.0,
52
+ "initializer_range": 0.02,
53
+ "intermediate_size": 1536,
54
+ "layer_norm_eps": 1e-05,
55
+ "model_type": "metaclip_2_vision_model",
56
+ "num_attention_heads": 6,
57
+ "num_channels": 3,
58
+ "num_hidden_layers": 12,
59
+ "patch_size": 16,
60
+ "projection_dim": 384
61
+ }
62
+ }
checkpoint-511/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d793bb7e31fe1522ed7baa3af0fcc747982f07f02d3ad5594e2cc34905eb4f0
3
+ size 86697088
checkpoint-511/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abf453e6617df23ff86a1b6c31189074d6103e691daa93306e41fc66e417290a
3
+ size 173510411
checkpoint-511/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "processor_class": "CLIPProcessor",
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ }
checkpoint-511/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e13278bc0b31f9c6175e72aeb9d66009684ebad7bf515a7f30cef7c3f69d6dfd
3
+ size 14645
checkpoint-511/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42a702253fe8084d1005ae19eaa0876667e9fa60b259cce6d5ff863d34659d5d
3
+ size 1465
checkpoint-511/trainer_state.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 511,
3
+ "best_metric": 0.18303145468235016,
4
+ "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-511",
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 511,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.9784735812133072,
14
+ "grad_norm": 13.377490043640137,
15
+ "learning_rate": 1.5496489468405215e-05,
16
+ "loss": 0.3912,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy": 0.933435301315387,
22
+ "eval_loss": 0.18303145468235016,
23
+ "eval_model_preparation_time": 0.013,
24
+ "eval_runtime": 81.9678,
25
+ "eval_samples_per_second": 199.408,
26
+ "eval_steps_per_second": 24.937,
27
+ "step": 511
28
+ }
29
+ ],
30
+ "logging_steps": 500,
31
+ "max_steps": 2044,
32
+ "num_input_tokens_seen": 0,
33
+ "num_train_epochs": 4,
34
+ "save_steps": 500,
35
+ "stateful_callbacks": {
36
+ "TrainerControl": {
37
+ "args": {
38
+ "should_epoch_stop": false,
39
+ "should_evaluate": false,
40
+ "should_log": false,
41
+ "should_save": true,
42
+ "should_training_stop": false
43
+ },
44
+ "attributes": {}
45
+ }
46
+ },
47
+ "total_flos": 3.187576517078016e+17,
48
+ "train_batch_size": 32,
49
+ "trial_name": null,
50
+ "trial_params": null
51
+ }
checkpoint-511/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a90bc66716cb082ec5c00af9d059cdce153b87ee7290bee045c716ff787c4e
3
+ size 5777
config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MetaClip2ForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "buildings",
8
+ "1": "forest",
9
+ "2": "glacier",
10
+ "3": "mountain",
11
+ "4": "sea",
12
+ "5": "street"
13
+ },
14
+ "initializer_factor": 1.0,
15
+ "label2id": {
16
+ "buildings": 0,
17
+ "forest": 1,
18
+ "glacier": 2,
19
+ "mountain": 3,
20
+ "sea": 4,
21
+ "street": 5
22
+ },
23
+ "logit_scale_init_value": 2.6592,
24
+ "model_type": "metaclip_2",
25
+ "problem_type": "single_label_classification",
26
+ "projection_dim": 384,
27
+ "text_config": {
28
+ "attention_dropout": 0.0,
29
+ "dtype": "float32",
30
+ "eos_token_id": 2,
31
+ "hidden_act": "gelu",
32
+ "hidden_size": 384,
33
+ "initializer_factor": 1.0,
34
+ "initializer_range": 0.02,
35
+ "intermediate_size": 1536,
36
+ "layer_norm_eps": 1e-05,
37
+ "max_position_embeddings": 77,
38
+ "model_type": "metaclip_2_text_model",
39
+ "num_attention_heads": 6,
40
+ "num_hidden_layers": 12,
41
+ "projection_dim": 384,
42
+ "vocab_size": 901629
43
+ },
44
+ "transformers_version": "4.57.1",
45
+ "vision_config": {
46
+ "attention_dropout": 0.0,
47
+ "dtype": "float32",
48
+ "hidden_act": "gelu",
49
+ "hidden_size": 384,
50
+ "image_size": 224,
51
+ "initializer_factor": 1.0,
52
+ "initializer_range": 0.02,
53
+ "intermediate_size": 1536,
54
+ "layer_norm_eps": 1e-05,
55
+ "model_type": "metaclip_2_vision_model",
56
+ "num_attention_heads": 6,
57
+ "num_channels": 3,
58
+ "num_hidden_layers": 12,
59
+ "patch_size": 16,
60
+ "projection_dim": 384
61
+ }
62
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e38b489c601470b226091b616cfc84875e04e8e51d5ebdb4698cae9348fd3da
3
+ size 86697088
preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "processor_class": "CLIPProcessor",
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a90bc66716cb082ec5c00af9d059cdce153b87ee7290bee045c716ff787c4e
3
+ size 5777