Shawon16 commited on
Commit
2c0fe5f
·
verified ·
1 Parent(s): 3077f48

Model save

Browse files
README.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: cc-by-nc-4.0
4
+ base_model: MCG-NJU/videomae-base-finetuned-kinetics
5
+ tags:
6
+ - generated_from_trainer
7
+ metrics:
8
+ - accuracy
9
+ model-index:
10
+ - name: VideoMAE_Kinetics_fold__2__BdSLW60_SKF
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # VideoMAE_Kinetics_fold__2__BdSLW60_SKF
18
+
19
+ This model is a fine-tuned version of [MCG-NJU/videomae-base-finetuned-kinetics](https://huggingface.co/MCG-NJU/videomae-base-finetuned-kinetics) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.0307
22
+ - Accuracy: 0.9925
23
+
24
+ ## Model description
25
+
26
+ More information needed
27
+
28
+ ## Intended uses & limitations
29
+
30
+ More information needed
31
+
32
+ ## Training and evaluation data
33
+
34
+ More information needed
35
+
36
+ ## Training procedure
37
+
38
+ ### Training hyperparameters
39
+
40
+ The following hyperparameters were used during training:
41
+ - learning_rate: 5e-05
42
+ - train_batch_size: 2
43
+ - eval_batch_size: 2
44
+ - seed: 42
45
+ - gradient_accumulation_steps: 4
46
+ - total_train_batch_size: 8
47
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
48
+ - lr_scheduler_type: linear
49
+ - lr_scheduler_warmup_ratio: 0.1
50
+ - training_steps: 9030
51
+ - mixed_precision_training: Native AMP
52
+
53
+ ### Training results
54
+
55
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
56
+ |:-------------:|:------:|:----:|:---------------:|:--------:|
57
+ | 8.3956 | 0.1 | 903 | 0.3172 | 0.9601 |
58
+ | 0.408 | 1.1001 | 1807 | 0.0426 | 0.9888 |
59
+ | 0.0963 | 2.1 | 2710 | 0.0403 | 0.9900 |
60
+ | 0.051 | 3.1001 | 3614 | 0.0611 | 0.9813 |
61
+ | 0.0393 | 4.1 | 4517 | 0.0298 | 0.9950 |
62
+ | 0.0309 | 5.1001 | 5421 | 0.0453 | 0.9863 |
63
+ | 0.0061 | 6.1 | 6324 | 0.0409 | 0.9875 |
64
+ | 0.0122 | 7.1001 | 7228 | 0.0552 | 0.9913 |
65
+ | 0.0039 | 8.1 | 8131 | 0.0272 | 0.9938 |
66
+ | 0.0066 | 9.0995 | 9030 | 0.0307 | 0.9925 |
67
+
68
+
69
+ ### Framework versions
70
+
71
+ - Transformers 4.46.1
72
+ - Pytorch 2.5.1+cu124
73
+ - Datasets 3.1.0
74
+ - Tokenizers 0.20.1
checkpoint/config.json ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "MCG-NJU/videomae-base-finetuned-kinetics",
3
+ "architectures": [
4
+ "VideoMAEForVideoClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "decoder_hidden_size": 384,
8
+ "decoder_intermediate_size": 1536,
9
+ "decoder_num_attention_heads": 6,
10
+ "decoder_num_hidden_layers": 4,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.0,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "aam",
16
+ "1": "aaple",
17
+ "2": "ac",
18
+ "3": "aids",
19
+ "4": "alu",
20
+ "5": "anaros",
21
+ "6": "angur",
22
+ "7": "apartment",
23
+ "8": "attio",
24
+ "9": "audio cassette",
25
+ "10": "ayna",
26
+ "11": "baandej",
27
+ "12": "baat",
28
+ "13": "baba",
29
+ "14": "balti",
30
+ "15": "balu",
31
+ "16": "bhai",
32
+ "17": "biscuts",
33
+ "18": "bon",
34
+ "19": "boroi",
35
+ "20": "bottam",
36
+ "21": "bou",
37
+ "22": "cake",
38
+ "23": "capsule",
39
+ "24": "cha",
40
+ "25": "chacha",
41
+ "26": "chachi",
42
+ "27": "chadar",
43
+ "28": "chal",
44
+ "29": "chikissha",
45
+ "30": "chini",
46
+ "31": "chips",
47
+ "32": "chiruni",
48
+ "33": "chocolate",
49
+ "34": "chokh utha",
50
+ "35": "chosma",
51
+ "36": "churi",
52
+ "37": "clip",
53
+ "38": "cream",
54
+ "39": "dada",
55
+ "40": "dadi",
56
+ "41": "daeitto",
57
+ "42": "dal",
58
+ "43": "debor",
59
+ "44": "denadar",
60
+ "45": "dengue",
61
+ "46": "doctor",
62
+ "47": "dongson",
63
+ "48": "dulavai",
64
+ "49": "durbol",
65
+ "50": "jomoj",
66
+ "51": "juta",
67
+ "52": "konna",
68
+ "53": "maa",
69
+ "54": "tattha",
70
+ "55": "toothpaste",
71
+ "56": "tshirt",
72
+ "57": "tubelight",
73
+ "58": "tupi",
74
+ "59": "tv"
75
+ },
76
+ "image_size": 224,
77
+ "initializer_range": 0.02,
78
+ "intermediate_size": 3072,
79
+ "label2id": {
80
+ "aam": 0,
81
+ "aaple": 1,
82
+ "ac": 2,
83
+ "aids": 3,
84
+ "alu": 4,
85
+ "anaros": 5,
86
+ "angur": 6,
87
+ "apartment": 7,
88
+ "attio": 8,
89
+ "audio cassette": 9,
90
+ "ayna": 10,
91
+ "baandej": 11,
92
+ "baat": 12,
93
+ "baba": 13,
94
+ "balti": 14,
95
+ "balu": 15,
96
+ "bhai": 16,
97
+ "biscuts": 17,
98
+ "bon": 18,
99
+ "boroi": 19,
100
+ "bottam": 20,
101
+ "bou": 21,
102
+ "cake": 22,
103
+ "capsule": 23,
104
+ "cha": 24,
105
+ "chacha": 25,
106
+ "chachi": 26,
107
+ "chadar": 27,
108
+ "chal": 28,
109
+ "chikissha": 29,
110
+ "chini": 30,
111
+ "chips": 31,
112
+ "chiruni": 32,
113
+ "chocolate": 33,
114
+ "chokh utha": 34,
115
+ "chosma": 35,
116
+ "churi": 36,
117
+ "clip": 37,
118
+ "cream": 38,
119
+ "dada": 39,
120
+ "dadi": 40,
121
+ "daeitto": 41,
122
+ "dal": 42,
123
+ "debor": 43,
124
+ "denadar": 44,
125
+ "dengue": 45,
126
+ "doctor": 46,
127
+ "dongson": 47,
128
+ "dulavai": 48,
129
+ "durbol": 49,
130
+ "jomoj": 50,
131
+ "juta": 51,
132
+ "konna": 52,
133
+ "maa": 53,
134
+ "tattha": 54,
135
+ "toothpaste": 55,
136
+ "tshirt": 56,
137
+ "tubelight": 57,
138
+ "tupi": 58,
139
+ "tv": 59
140
+ },
141
+ "layer_norm_eps": 1e-12,
142
+ "model_type": "videomae",
143
+ "norm_pix_loss": false,
144
+ "num_attention_heads": 12,
145
+ "num_channels": 3,
146
+ "num_frames": 16,
147
+ "num_hidden_layers": 12,
148
+ "patch_size": 16,
149
+ "problem_type": "single_label_classification",
150
+ "qkv_bias": true,
151
+ "torch_dtype": "float32",
152
+ "transformers_version": "4.46.1",
153
+ "tubelet_size": 2,
154
+ "use_mean_pooling": true
155
+ }
checkpoint/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccb09cd2dbf0e268ad28604230cd9e174b180d55f16912851dabebe61a0b2678
3
+ size 345115752
checkpoint/preprocessor_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_normalize": true,
8
+ "do_rescale": true,
9
+ "do_resize": true,
10
+ "image_mean": [
11
+ 0.485,
12
+ 0.456,
13
+ 0.406
14
+ ],
15
+ "image_processor_type": "VideoMAEImageProcessor",
16
+ "image_std": [
17
+ 0.229,
18
+ 0.224,
19
+ 0.225
20
+ ],
21
+ "resample": 2,
22
+ "rescale_factor": 0.00392156862745098,
23
+ "size": {
24
+ "shortest_edge": 224
25
+ }
26
+ }
checkpoint/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e84d7b92ddf35c294748ce436b84105af0566d64354ad9929b2f6ff1c9f303d
3
+ size 5368
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acf4268e88e3318357e728b7b3c813bc81a63bf6cdf0869fed2c71d755aa243a
3
  size 345115752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccb09cd2dbf0e268ad28604230cd9e174b180d55f16912851dabebe61a0b2678
3
  size 345115752