karl-wang commited on
Commit
9e025ef
·
verified ·
1 Parent(s): d10dd49

Upload folder using huggingface_hub

Browse files
models/.DS_Store ADDED
Binary file (6.15 kB). View file
 
models/checkpoint-15000/.DS_Store ADDED
Binary file (6.15 kB). View file
 
models/checkpoint-15000/config.json ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": "a0b4534a14f58e20944452dff00a22a06ce629d1",
3
+ "_name_or_path": "laion/larger_clap_music",
4
+ "architectures": [
5
+ "ClapModel"
6
+ ],
7
+ "audio_config": {
8
+ "_name_or_path": "",
9
+ "add_cross_attention": false,
10
+ "aff_block_r": 4,
11
+ "architectures": null,
12
+ "attention_probs_dropout_prob": 0.0,
13
+ "bad_words_ids": null,
14
+ "begin_suppress_tokens": null,
15
+ "bos_token_id": null,
16
+ "chunk_size_feed_forward": 0,
17
+ "cross_attention_hidden_size": null,
18
+ "decoder_start_token_id": null,
19
+ "depths": [
20
+ 2,
21
+ 2,
22
+ 12,
23
+ 2
24
+ ],
25
+ "diversity_penalty": 0.0,
26
+ "do_sample": false,
27
+ "drop_path_rate": 0.0,
28
+ "early_stopping": false,
29
+ "enable_fusion": false,
30
+ "enable_patch_layer_norm": true,
31
+ "encoder_no_repeat_ngram_size": 0,
32
+ "eos_token_id": null,
33
+ "exponential_decay_length_penalty": null,
34
+ "finetuning_task": null,
35
+ "flatten_patch_embeds": true,
36
+ "forced_bos_token_id": null,
37
+ "forced_eos_token_id": null,
38
+ "fusion_type": null,
39
+ "hidden_act": "gelu",
40
+ "hidden_dropout_prob": 0.1,
41
+ "hidden_size": 1024,
42
+ "id2label": {
43
+ "0": "LABEL_0",
44
+ "1": "LABEL_1"
45
+ },
46
+ "initializer_factor": 1.0,
47
+ "is_decoder": false,
48
+ "is_encoder_decoder": false,
49
+ "label2id": {
50
+ "LABEL_0": 0,
51
+ "LABEL_1": 1
52
+ },
53
+ "layer_norm_eps": 1e-05,
54
+ "length_penalty": 1.0,
55
+ "max_length": 20,
56
+ "min_length": 0,
57
+ "mlp_ratio": 4.0,
58
+ "model_type": "clap_audio_model",
59
+ "no_repeat_ngram_size": 0,
60
+ "num_attention_heads": [
61
+ 4,
62
+ 8,
63
+ 16,
64
+ 32
65
+ ],
66
+ "num_beam_groups": 1,
67
+ "num_beams": 1,
68
+ "num_classes": 527,
69
+ "num_hidden_layers": 4,
70
+ "num_mel_bins": 64,
71
+ "num_return_sequences": 1,
72
+ "output_attentions": false,
73
+ "output_hidden_states": false,
74
+ "output_scores": false,
75
+ "pad_token_id": null,
76
+ "patch_embed_input_channels": 1,
77
+ "patch_embeds_hidden_size": 128,
78
+ "patch_size": 4,
79
+ "patch_stride": [
80
+ 4,
81
+ 4
82
+ ],
83
+ "prefix": null,
84
+ "problem_type": null,
85
+ "projection_dim": 512,
86
+ "projection_hidden_act": "relu",
87
+ "pruned_heads": {},
88
+ "qkv_bias": true,
89
+ "remove_invalid_values": false,
90
+ "repetition_penalty": 1.0,
91
+ "return_dict": true,
92
+ "return_dict_in_generate": false,
93
+ "sep_token_id": null,
94
+ "spec_size": 256,
95
+ "suppress_tokens": null,
96
+ "task_specific_params": null,
97
+ "temperature": 1.0,
98
+ "tf_legacy_loss": false,
99
+ "tie_encoder_decoder": false,
100
+ "tie_word_embeddings": true,
101
+ "tokenizer_class": null,
102
+ "top_k": 50,
103
+ "top_p": 1.0,
104
+ "torch_dtype": null,
105
+ "torchscript": false,
106
+ "transformers_version": "4.30.2",
107
+ "typical_p": 1.0,
108
+ "use_bfloat16": false,
109
+ "window_size": 8
110
+ },
111
+ "hidden_size": 768,
112
+ "initializer_factor": 1.0,
113
+ "logit_scale_init_value": 14.285714285714285,
114
+ "model_type": "clap",
115
+ "num_hidden_layers": 16,
116
+ "projection_dim": 512,
117
+ "projection_hidden_act": "relu",
118
+ "text_config": {
119
+ "_name_or_path": "",
120
+ "add_cross_attention": false,
121
+ "architectures": null,
122
+ "attention_probs_dropout_prob": 0.1,
123
+ "bad_words_ids": null,
124
+ "begin_suppress_tokens": null,
125
+ "bos_token_id": 0,
126
+ "chunk_size_feed_forward": 0,
127
+ "classifier_dropout": null,
128
+ "cross_attention_hidden_size": null,
129
+ "decoder_start_token_id": null,
130
+ "diversity_penalty": 0.0,
131
+ "do_sample": false,
132
+ "early_stopping": false,
133
+ "encoder_no_repeat_ngram_size": 0,
134
+ "eos_token_id": 2,
135
+ "exponential_decay_length_penalty": null,
136
+ "finetuning_task": null,
137
+ "forced_bos_token_id": null,
138
+ "forced_eos_token_id": null,
139
+ "hidden_act": "gelu",
140
+ "hidden_dropout_prob": 0.1,
141
+ "hidden_size": 768,
142
+ "id2label": {
143
+ "0": "LABEL_0",
144
+ "1": "LABEL_1"
145
+ },
146
+ "initializer_factor": 1.0,
147
+ "initializer_range": 0.02,
148
+ "intermediate_size": 3072,
149
+ "is_decoder": false,
150
+ "is_encoder_decoder": false,
151
+ "label2id": {
152
+ "LABEL_0": 0,
153
+ "LABEL_1": 1
154
+ },
155
+ "layer_norm_eps": 1e-12,
156
+ "length_penalty": 1.0,
157
+ "max_length": 20,
158
+ "max_position_embeddings": 514,
159
+ "min_length": 0,
160
+ "model_type": "clap_text_model",
161
+ "no_repeat_ngram_size": 0,
162
+ "num_attention_heads": 12,
163
+ "num_beam_groups": 1,
164
+ "num_beams": 1,
165
+ "num_hidden_layers": 12,
166
+ "num_return_sequences": 1,
167
+ "output_attentions": false,
168
+ "output_hidden_states": false,
169
+ "output_scores": false,
170
+ "pad_token_id": 1,
171
+ "position_embedding_type": "absolute",
172
+ "prefix": null,
173
+ "problem_type": null,
174
+ "projection_dim": 512,
175
+ "projection_hidden_act": "relu",
176
+ "pruned_heads": {},
177
+ "remove_invalid_values": false,
178
+ "repetition_penalty": 1.0,
179
+ "return_dict": true,
180
+ "return_dict_in_generate": false,
181
+ "sep_token_id": null,
182
+ "suppress_tokens": null,
183
+ "task_specific_params": null,
184
+ "temperature": 1.0,
185
+ "tf_legacy_loss": false,
186
+ "tie_encoder_decoder": false,
187
+ "tie_word_embeddings": true,
188
+ "tokenizer_class": null,
189
+ "top_k": 50,
190
+ "top_p": 1.0,
191
+ "torch_dtype": null,
192
+ "torchscript": false,
193
+ "transformers_version": "4.30.2",
194
+ "type_vocab_size": 1,
195
+ "typical_p": 1.0,
196
+ "use_bfloat16": false,
197
+ "use_cache": true,
198
+ "vocab_size": 50265
199
+ },
200
+ "torch_dtype": "float32",
201
+ "transformers_version": "4.35.0.dev0"
202
+ }
models/checkpoint-15000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a4ce0b8df5e721a641adc43e3e3a1f36a3bba500c2e02047995ceadadcd19e3
3
+ size 1551623930
models/checkpoint-15000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:916d3f0ea2913ca41fd63c12951925baa669808b35feda5713ab3eb126766df1
3
+ size 776452854
models/checkpoint-15000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f848fabb950660ee3d5e87f0502e308ecdb8d3e50728f9b017301f67b5c1c44f
3
+ size 14244
models/checkpoint-15000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca926e2c4f4c7ad8c406afa62faddf3cf8e566303a56e0d7338bae19c1817c0b
3
+ size 1064
models/checkpoint-15000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
models/checkpoint-15000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4df838ef988efa77991d544c0f2afde996b0f0ca9ef7ccbe236caf5dbb119b48
3
+ size 4344
models/checkpoint-23000/.DS_Store ADDED
Binary file (6.15 kB). View file
 
models/checkpoint-23000/config.json ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": "a0b4534a14f58e20944452dff00a22a06ce629d1",
3
+ "_name_or_path": "laion/larger_clap_music",
4
+ "architectures": [
5
+ "ClapModel"
6
+ ],
7
+ "audio_config": {
8
+ "_name_or_path": "",
9
+ "add_cross_attention": false,
10
+ "aff_block_r": 4,
11
+ "architectures": null,
12
+ "attention_probs_dropout_prob": 0.0,
13
+ "bad_words_ids": null,
14
+ "begin_suppress_tokens": null,
15
+ "bos_token_id": null,
16
+ "chunk_size_feed_forward": 0,
17
+ "cross_attention_hidden_size": null,
18
+ "decoder_start_token_id": null,
19
+ "depths": [
20
+ 2,
21
+ 2,
22
+ 12,
23
+ 2
24
+ ],
25
+ "diversity_penalty": 0.0,
26
+ "do_sample": false,
27
+ "drop_path_rate": 0.0,
28
+ "early_stopping": false,
29
+ "enable_fusion": false,
30
+ "enable_patch_layer_norm": true,
31
+ "encoder_no_repeat_ngram_size": 0,
32
+ "eos_token_id": null,
33
+ "exponential_decay_length_penalty": null,
34
+ "finetuning_task": null,
35
+ "flatten_patch_embeds": true,
36
+ "forced_bos_token_id": null,
37
+ "forced_eos_token_id": null,
38
+ "fusion_type": null,
39
+ "hidden_act": "gelu",
40
+ "hidden_dropout_prob": 0.1,
41
+ "hidden_size": 1024,
42
+ "id2label": {
43
+ "0": "LABEL_0",
44
+ "1": "LABEL_1"
45
+ },
46
+ "initializer_factor": 1.0,
47
+ "is_decoder": false,
48
+ "is_encoder_decoder": false,
49
+ "label2id": {
50
+ "LABEL_0": 0,
51
+ "LABEL_1": 1
52
+ },
53
+ "layer_norm_eps": 1e-05,
54
+ "length_penalty": 1.0,
55
+ "max_length": 20,
56
+ "min_length": 0,
57
+ "mlp_ratio": 4.0,
58
+ "model_type": "clap_audio_model",
59
+ "no_repeat_ngram_size": 0,
60
+ "num_attention_heads": [
61
+ 4,
62
+ 8,
63
+ 16,
64
+ 32
65
+ ],
66
+ "num_beam_groups": 1,
67
+ "num_beams": 1,
68
+ "num_classes": 527,
69
+ "num_hidden_layers": 4,
70
+ "num_mel_bins": 64,
71
+ "num_return_sequences": 1,
72
+ "output_attentions": false,
73
+ "output_hidden_states": false,
74
+ "output_scores": false,
75
+ "pad_token_id": null,
76
+ "patch_embed_input_channels": 1,
77
+ "patch_embeds_hidden_size": 128,
78
+ "patch_size": 4,
79
+ "patch_stride": [
80
+ 4,
81
+ 4
82
+ ],
83
+ "prefix": null,
84
+ "problem_type": null,
85
+ "projection_dim": 512,
86
+ "projection_hidden_act": "relu",
87
+ "pruned_heads": {},
88
+ "qkv_bias": true,
89
+ "remove_invalid_values": false,
90
+ "repetition_penalty": 1.0,
91
+ "return_dict": true,
92
+ "return_dict_in_generate": false,
93
+ "sep_token_id": null,
94
+ "spec_size": 256,
95
+ "suppress_tokens": null,
96
+ "task_specific_params": null,
97
+ "temperature": 1.0,
98
+ "tf_legacy_loss": false,
99
+ "tie_encoder_decoder": false,
100
+ "tie_word_embeddings": true,
101
+ "tokenizer_class": null,
102
+ "top_k": 50,
103
+ "top_p": 1.0,
104
+ "torch_dtype": null,
105
+ "torchscript": false,
106
+ "transformers_version": "4.30.2",
107
+ "typical_p": 1.0,
108
+ "use_bfloat16": false,
109
+ "window_size": 8
110
+ },
111
+ "hidden_size": 768,
112
+ "initializer_factor": 1.0,
113
+ "logit_scale_init_value": 14.285714285714285,
114
+ "model_type": "clap",
115
+ "num_hidden_layers": 16,
116
+ "projection_dim": 512,
117
+ "projection_hidden_act": "relu",
118
+ "text_config": {
119
+ "_name_or_path": "",
120
+ "add_cross_attention": false,
121
+ "architectures": null,
122
+ "attention_probs_dropout_prob": 0.1,
123
+ "bad_words_ids": null,
124
+ "begin_suppress_tokens": null,
125
+ "bos_token_id": 0,
126
+ "chunk_size_feed_forward": 0,
127
+ "classifier_dropout": null,
128
+ "cross_attention_hidden_size": null,
129
+ "decoder_start_token_id": null,
130
+ "diversity_penalty": 0.0,
131
+ "do_sample": false,
132
+ "early_stopping": false,
133
+ "encoder_no_repeat_ngram_size": 0,
134
+ "eos_token_id": 2,
135
+ "exponential_decay_length_penalty": null,
136
+ "finetuning_task": null,
137
+ "forced_bos_token_id": null,
138
+ "forced_eos_token_id": null,
139
+ "hidden_act": "gelu",
140
+ "hidden_dropout_prob": 0.1,
141
+ "hidden_size": 768,
142
+ "id2label": {
143
+ "0": "LABEL_0",
144
+ "1": "LABEL_1"
145
+ },
146
+ "initializer_factor": 1.0,
147
+ "initializer_range": 0.02,
148
+ "intermediate_size": 3072,
149
+ "is_decoder": false,
150
+ "is_encoder_decoder": false,
151
+ "label2id": {
152
+ "LABEL_0": 0,
153
+ "LABEL_1": 1
154
+ },
155
+ "layer_norm_eps": 1e-12,
156
+ "length_penalty": 1.0,
157
+ "max_length": 20,
158
+ "max_position_embeddings": 514,
159
+ "min_length": 0,
160
+ "model_type": "clap_text_model",
161
+ "no_repeat_ngram_size": 0,
162
+ "num_attention_heads": 12,
163
+ "num_beam_groups": 1,
164
+ "num_beams": 1,
165
+ "num_hidden_layers": 12,
166
+ "num_return_sequences": 1,
167
+ "output_attentions": false,
168
+ "output_hidden_states": false,
169
+ "output_scores": false,
170
+ "pad_token_id": 1,
171
+ "position_embedding_type": "absolute",
172
+ "prefix": null,
173
+ "problem_type": null,
174
+ "projection_dim": 512,
175
+ "projection_hidden_act": "relu",
176
+ "pruned_heads": {},
177
+ "remove_invalid_values": false,
178
+ "repetition_penalty": 1.0,
179
+ "return_dict": true,
180
+ "return_dict_in_generate": false,
181
+ "sep_token_id": null,
182
+ "suppress_tokens": null,
183
+ "task_specific_params": null,
184
+ "temperature": 1.0,
185
+ "tf_legacy_loss": false,
186
+ "tie_encoder_decoder": false,
187
+ "tie_word_embeddings": true,
188
+ "tokenizer_class": null,
189
+ "top_k": 50,
190
+ "top_p": 1.0,
191
+ "torch_dtype": null,
192
+ "torchscript": false,
193
+ "transformers_version": "4.30.2",
194
+ "type_vocab_size": 1,
195
+ "typical_p": 1.0,
196
+ "use_bfloat16": false,
197
+ "use_cache": true,
198
+ "vocab_size": 50265
199
+ },
200
+ "torch_dtype": "float32",
201
+ "transformers_version": "4.35.0.dev0"
202
+ }
models/checkpoint-23000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19a52979e49434aac9f438b7a3ca0e25438b072006e5a9c80e58217bd6608de3
3
+ size 1551623930
models/checkpoint-23000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:074cefe525a058d5f4fb17fdf182c9a8b5675bf5e06c57b0184cf25f2a4e7301
3
+ size 776452854
models/checkpoint-23000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc66b1baf9dd7c42045b3d204884e2ad4f05ce7c4af00774dd91a4d392c85e3c
3
+ size 14244
models/checkpoint-23000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b51b5c9976c05919548fcfe5f9d95afe9ea06b109ce6cfd032e55729c50a2d96
3
+ size 1064
models/checkpoint-23000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
models/checkpoint-23000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4df838ef988efa77991d544c0f2afde996b0f0ca9ef7ccbe236caf5dbb119b48
3
+ size 4344
models/checkpoint-500/.DS_Store ADDED
Binary file (6.15 kB). View file
 
models/checkpoint-500/config.json ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": "a0b4534a14f58e20944452dff00a22a06ce629d1",
3
+ "_name_or_path": "laion/larger_clap_music",
4
+ "architectures": [
5
+ "ClapModel"
6
+ ],
7
+ "audio_config": {
8
+ "_name_or_path": "",
9
+ "add_cross_attention": false,
10
+ "aff_block_r": 4,
11
+ "architectures": null,
12
+ "attention_probs_dropout_prob": 0.0,
13
+ "bad_words_ids": null,
14
+ "begin_suppress_tokens": null,
15
+ "bos_token_id": null,
16
+ "chunk_size_feed_forward": 0,
17
+ "cross_attention_hidden_size": null,
18
+ "decoder_start_token_id": null,
19
+ "depths": [
20
+ 2,
21
+ 2,
22
+ 12,
23
+ 2
24
+ ],
25
+ "diversity_penalty": 0.0,
26
+ "do_sample": false,
27
+ "drop_path_rate": 0.0,
28
+ "early_stopping": false,
29
+ "enable_fusion": false,
30
+ "enable_patch_layer_norm": true,
31
+ "encoder_no_repeat_ngram_size": 0,
32
+ "eos_token_id": null,
33
+ "exponential_decay_length_penalty": null,
34
+ "finetuning_task": null,
35
+ "flatten_patch_embeds": true,
36
+ "forced_bos_token_id": null,
37
+ "forced_eos_token_id": null,
38
+ "fusion_type": null,
39
+ "hidden_act": "gelu",
40
+ "hidden_dropout_prob": 0.1,
41
+ "hidden_size": 1024,
42
+ "id2label": {
43
+ "0": "LABEL_0",
44
+ "1": "LABEL_1"
45
+ },
46
+ "initializer_factor": 1.0,
47
+ "is_decoder": false,
48
+ "is_encoder_decoder": false,
49
+ "label2id": {
50
+ "LABEL_0": 0,
51
+ "LABEL_1": 1
52
+ },
53
+ "layer_norm_eps": 1e-05,
54
+ "length_penalty": 1.0,
55
+ "max_length": 20,
56
+ "min_length": 0,
57
+ "mlp_ratio": 4.0,
58
+ "model_type": "clap_audio_model",
59
+ "no_repeat_ngram_size": 0,
60
+ "num_attention_heads": [
61
+ 4,
62
+ 8,
63
+ 16,
64
+ 32
65
+ ],
66
+ "num_beam_groups": 1,
67
+ "num_beams": 1,
68
+ "num_classes": 527,
69
+ "num_hidden_layers": 4,
70
+ "num_mel_bins": 64,
71
+ "num_return_sequences": 1,
72
+ "output_attentions": false,
73
+ "output_hidden_states": false,
74
+ "output_scores": false,
75
+ "pad_token_id": null,
76
+ "patch_embed_input_channels": 1,
77
+ "patch_embeds_hidden_size": 128,
78
+ "patch_size": 4,
79
+ "patch_stride": [
80
+ 4,
81
+ 4
82
+ ],
83
+ "prefix": null,
84
+ "problem_type": null,
85
+ "projection_dim": 512,
86
+ "projection_hidden_act": "relu",
87
+ "pruned_heads": {},
88
+ "qkv_bias": true,
89
+ "remove_invalid_values": false,
90
+ "repetition_penalty": 1.0,
91
+ "return_dict": true,
92
+ "return_dict_in_generate": false,
93
+ "sep_token_id": null,
94
+ "spec_size": 256,
95
+ "suppress_tokens": null,
96
+ "task_specific_params": null,
97
+ "temperature": 1.0,
98
+ "tf_legacy_loss": false,
99
+ "tie_encoder_decoder": false,
100
+ "tie_word_embeddings": true,
101
+ "tokenizer_class": null,
102
+ "top_k": 50,
103
+ "top_p": 1.0,
104
+ "torch_dtype": null,
105
+ "torchscript": false,
106
+ "transformers_version": "4.30.2",
107
+ "typical_p": 1.0,
108
+ "use_bfloat16": false,
109
+ "window_size": 8
110
+ },
111
+ "hidden_size": 768,
112
+ "initializer_factor": 1.0,
113
+ "logit_scale_init_value": 14.285714285714285,
114
+ "model_type": "clap",
115
+ "num_hidden_layers": 16,
116
+ "projection_dim": 512,
117
+ "projection_hidden_act": "relu",
118
+ "text_config": {
119
+ "_name_or_path": "",
120
+ "add_cross_attention": false,
121
+ "architectures": null,
122
+ "attention_probs_dropout_prob": 0.1,
123
+ "bad_words_ids": null,
124
+ "begin_suppress_tokens": null,
125
+ "bos_token_id": 0,
126
+ "chunk_size_feed_forward": 0,
127
+ "classifier_dropout": null,
128
+ "cross_attention_hidden_size": null,
129
+ "decoder_start_token_id": null,
130
+ "diversity_penalty": 0.0,
131
+ "do_sample": false,
132
+ "early_stopping": false,
133
+ "encoder_no_repeat_ngram_size": 0,
134
+ "eos_token_id": 2,
135
+ "exponential_decay_length_penalty": null,
136
+ "finetuning_task": null,
137
+ "forced_bos_token_id": null,
138
+ "forced_eos_token_id": null,
139
+ "hidden_act": "gelu",
140
+ "hidden_dropout_prob": 0.1,
141
+ "hidden_size": 768,
142
+ "id2label": {
143
+ "0": "LABEL_0",
144
+ "1": "LABEL_1"
145
+ },
146
+ "initializer_factor": 1.0,
147
+ "initializer_range": 0.02,
148
+ "intermediate_size": 3072,
149
+ "is_decoder": false,
150
+ "is_encoder_decoder": false,
151
+ "label2id": {
152
+ "LABEL_0": 0,
153
+ "LABEL_1": 1
154
+ },
155
+ "layer_norm_eps": 1e-12,
156
+ "length_penalty": 1.0,
157
+ "max_length": 20,
158
+ "max_position_embeddings": 514,
159
+ "min_length": 0,
160
+ "model_type": "clap_text_model",
161
+ "no_repeat_ngram_size": 0,
162
+ "num_attention_heads": 12,
163
+ "num_beam_groups": 1,
164
+ "num_beams": 1,
165
+ "num_hidden_layers": 12,
166
+ "num_return_sequences": 1,
167
+ "output_attentions": false,
168
+ "output_hidden_states": false,
169
+ "output_scores": false,
170
+ "pad_token_id": 1,
171
+ "position_embedding_type": "absolute",
172
+ "prefix": null,
173
+ "problem_type": null,
174
+ "projection_dim": 512,
175
+ "projection_hidden_act": "relu",
176
+ "pruned_heads": {},
177
+ "remove_invalid_values": false,
178
+ "repetition_penalty": 1.0,
179
+ "return_dict": true,
180
+ "return_dict_in_generate": false,
181
+ "sep_token_id": null,
182
+ "suppress_tokens": null,
183
+ "task_specific_params": null,
184
+ "temperature": 1.0,
185
+ "tf_legacy_loss": false,
186
+ "tie_encoder_decoder": false,
187
+ "tie_word_embeddings": true,
188
+ "tokenizer_class": null,
189
+ "top_k": 50,
190
+ "top_p": 1.0,
191
+ "torch_dtype": null,
192
+ "torchscript": false,
193
+ "transformers_version": "4.30.2",
194
+ "type_vocab_size": 1,
195
+ "typical_p": 1.0,
196
+ "use_bfloat16": false,
197
+ "use_cache": true,
198
+ "vocab_size": 50265
199
+ },
200
+ "torch_dtype": "float32",
201
+ "transformers_version": "4.35.0.dev0"
202
+ }
models/checkpoint-500/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f65b89ff9341fe02f2c83e381f83f1599d90c2e0c037aea39894cdbacfcc5678
3
+ size 776452854
models/checkpoint-500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0145eb2bfb043a6b54a9c6889cb8adfa1128d6a25befd3bfc130e9e393a9ffde
3
+ size 14244
models/checkpoint-500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7acd664636e5a7597958f170516014abdca4dc3c8b6440d5f6081ff7d62fb1cd
3
+ size 1064
models/checkpoint-500/trainer_state.json ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9861932938856016,
5
+ "global_step": 500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.04,
12
+ "learning_rate": 3.944773175542407e-08,
13
+ "loss": 4.1595,
14
+ "step": 20
15
+ },
16
+ {
17
+ "epoch": 0.08,
18
+ "learning_rate": 7.889546351084814e-08,
19
+ "loss": 4.1591,
20
+ "step": 40
21
+ },
22
+ {
23
+ "epoch": 0.12,
24
+ "learning_rate": 1.183431952662722e-07,
25
+ "loss": 4.159,
26
+ "step": 60
27
+ },
28
+ {
29
+ "epoch": 0.16,
30
+ "learning_rate": 1.5779092702169629e-07,
31
+ "loss": 4.1588,
32
+ "step": 80
33
+ },
34
+ {
35
+ "epoch": 0.2,
36
+ "learning_rate": 1.9723865877712034e-07,
37
+ "loss": 4.1589,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.24,
42
+ "learning_rate": 2.366863905325444e-07,
43
+ "loss": 4.1591,
44
+ "step": 120
45
+ },
46
+ {
47
+ "epoch": 0.28,
48
+ "learning_rate": 2.7613412228796843e-07,
49
+ "loss": 4.1591,
50
+ "step": 140
51
+ },
52
+ {
53
+ "epoch": 0.32,
54
+ "learning_rate": 3.1558185404339257e-07,
55
+ "loss": 4.159,
56
+ "step": 160
57
+ },
58
+ {
59
+ "epoch": 0.36,
60
+ "learning_rate": 3.550295857988166e-07,
61
+ "loss": 4.1589,
62
+ "step": 180
63
+ },
64
+ {
65
+ "epoch": 0.39,
66
+ "learning_rate": 3.944773175542407e-07,
67
+ "loss": 4.159,
68
+ "step": 200
69
+ },
70
+ {
71
+ "epoch": 0.43,
72
+ "learning_rate": 4.339250493096647e-07,
73
+ "loss": 4.1591,
74
+ "step": 220
75
+ },
76
+ {
77
+ "epoch": 0.47,
78
+ "learning_rate": 4.733727810650888e-07,
79
+ "loss": 4.1591,
80
+ "step": 240
81
+ },
82
+ {
83
+ "epoch": 0.51,
84
+ "learning_rate": 5.128205128205128e-07,
85
+ "loss": 4.1589,
86
+ "step": 260
87
+ },
88
+ {
89
+ "epoch": 0.55,
90
+ "learning_rate": 5.522682445759369e-07,
91
+ "loss": 4.1589,
92
+ "step": 280
93
+ },
94
+ {
95
+ "epoch": 0.59,
96
+ "learning_rate": 5.91715976331361e-07,
97
+ "loss": 4.1591,
98
+ "step": 300
99
+ },
100
+ {
101
+ "epoch": 0.63,
102
+ "learning_rate": 6.311637080867851e-07,
103
+ "loss": 4.1588,
104
+ "step": 320
105
+ },
106
+ {
107
+ "epoch": 0.67,
108
+ "learning_rate": 6.706114398422091e-07,
109
+ "loss": 4.1589,
110
+ "step": 340
111
+ },
112
+ {
113
+ "epoch": 0.71,
114
+ "learning_rate": 7.100591715976332e-07,
115
+ "loss": 4.1589,
116
+ "step": 360
117
+ },
118
+ {
119
+ "epoch": 0.75,
120
+ "learning_rate": 7.495069033530572e-07,
121
+ "loss": 4.1587,
122
+ "step": 380
123
+ },
124
+ {
125
+ "epoch": 0.79,
126
+ "learning_rate": 7.889546351084814e-07,
127
+ "loss": 4.1587,
128
+ "step": 400
129
+ },
130
+ {
131
+ "epoch": 0.83,
132
+ "learning_rate": 8.284023668639055e-07,
133
+ "loss": 4.159,
134
+ "step": 420
135
+ },
136
+ {
137
+ "epoch": 0.87,
138
+ "learning_rate": 8.678500986193294e-07,
139
+ "loss": 4.1588,
140
+ "step": 440
141
+ },
142
+ {
143
+ "epoch": 0.91,
144
+ "learning_rate": 9.072978303747536e-07,
145
+ "loss": 4.1589,
146
+ "step": 460
147
+ },
148
+ {
149
+ "epoch": 0.95,
150
+ "learning_rate": 9.467455621301776e-07,
151
+ "loss": 4.1589,
152
+ "step": 480
153
+ },
154
+ {
155
+ "epoch": 0.99,
156
+ "learning_rate": 9.861932938856016e-07,
157
+ "loss": 4.1588,
158
+ "step": 500
159
+ },
160
+ {
161
+ "epoch": 0.99,
162
+ "eval_loss": 4.158804893493652,
163
+ "eval_runtime": 67.7811,
164
+ "eval_samples_per_second": 88.638,
165
+ "eval_steps_per_second": 0.177,
166
+ "step": 500
167
+ }
168
+ ],
169
+ "max_steps": 50700,
170
+ "num_train_epochs": 100,
171
+ "total_flos": 1.21829989023744e+17,
172
+ "trial_name": null,
173
+ "trial_params": null
174
+ }
models/checkpoint-500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4df838ef988efa77991d544c0f2afde996b0f0ca9ef7ccbe236caf5dbb119b48
3
+ size 4344