pavanmantha commited on
Commit
69fec75
·
verified ·
1 Parent(s): 3b78ca2

Add fine-tuned Whisper Medium Sanskrit model

Browse files
Files changed (3) hide show
  1. config.json +25 -12
  2. generation_config.json +109 -110
  3. model.safetensors +2 -2
config.json CHANGED
@@ -8,20 +8,33 @@
8
  "attention_dropout": 0.0,
9
  "bos_token_id": 50257,
10
  "classifier_proj_size": 256,
11
- "d_model": 1280,
12
- "decoder_attention_heads": 20,
13
- "decoder_ffn_dim": 5120,
14
  "decoder_layerdrop": 0.0,
15
- "decoder_layers": 32,
16
  "decoder_start_token_id": 50258,
17
  "dropout": 0.0,
18
- "dtype": "bfloat16",
19
- "encoder_attention_heads": 20,
20
- "encoder_ffn_dim": 5120,
21
  "encoder_layerdrop": 0.0,
22
- "encoder_layers": 32,
23
  "eos_token_id": 50257,
24
- "forced_decoder_ids": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  "init_std": 0.02,
26
  "is_encoder_decoder": true,
27
  "mask_feature_length": 10,
@@ -34,13 +47,13 @@
34
  "max_target_positions": 448,
35
  "median_filter_width": 7,
36
  "model_type": "whisper",
37
- "num_hidden_layers": 32,
38
  "num_mel_bins": 80,
39
  "pad_token_id": 50257,
40
  "scale_embedding": false,
41
  "tie_word_embeddings": true,
42
- "transformers_version": "5.3.0",
43
- "use_cache": false,
44
  "use_weighted_layer_sum": false,
45
  "vocab_size": 51865
46
  }
 
8
  "attention_dropout": 0.0,
9
  "bos_token_id": 50257,
10
  "classifier_proj_size": 256,
11
+ "d_model": 1024,
12
+ "decoder_attention_heads": 16,
13
+ "decoder_ffn_dim": 4096,
14
  "decoder_layerdrop": 0.0,
15
+ "decoder_layers": 24,
16
  "decoder_start_token_id": 50258,
17
  "dropout": 0.0,
18
+ "dtype": "float32",
19
+ "encoder_attention_heads": 16,
20
+ "encoder_ffn_dim": 4096,
21
  "encoder_layerdrop": 0.0,
22
+ "encoder_layers": 24,
23
  "eos_token_id": 50257,
24
+ "forced_decoder_ids": [
25
+ [
26
+ 1,
27
+ 50259
28
+ ],
29
+ [
30
+ 2,
31
+ 50359
32
+ ],
33
+ [
34
+ 3,
35
+ 50363
36
+ ]
37
+ ],
38
  "init_std": 0.02,
39
  "is_encoder_decoder": true,
40
  "mask_feature_length": 10,
 
47
  "max_target_positions": 448,
48
  "median_filter_width": 7,
49
  "model_type": "whisper",
50
+ "num_hidden_layers": 24,
51
  "num_mel_bins": 80,
52
  "pad_token_id": 50257,
53
  "scale_embedding": false,
54
  "tie_word_embeddings": true,
55
+ "transformers_version": "5.2.0",
56
+ "use_cache": true,
57
  "use_weighted_layer_sum": false,
58
  "vocab_size": 51865
59
  }
generation_config.json CHANGED
@@ -1,117 +1,47 @@
1
  {
2
  "alignment_heads": [
3
- [
4
- 10,
5
- 12
6
- ],
7
  [
8
  13,
9
- 17
10
- ],
11
- [
12
- 16,
13
- 11
14
- ],
15
- [
16
- 16,
17
- 12
18
- ],
19
- [
20
- 16,
21
- 13
22
- ],
23
- [
24
- 17,
25
  15
26
  ],
27
  [
28
- 17,
29
- 16
30
- ],
31
- [
32
- 18,
33
  4
34
  ],
35
  [
36
- 18,
37
- 11
38
- ],
39
- [
40
- 18,
41
- 19
42
- ],
43
- [
44
- 19,
45
- 11
46
- ],
47
- [
48
- 21,
49
- 2
50
- ],
51
- [
52
- 21,
53
- 3
54
- ],
55
- [
56
- 22,
57
- 3
58
- ],
59
- [
60
- 22,
61
- 9
62
- ],
63
- [
64
- 22,
65
- 12
66
- ],
67
- [
68
- 23,
69
- 5
70
- ],
71
- [
72
- 23,
73
- 7
74
- ],
75
- [
76
- 23,
77
- 13
78
- ],
79
- [
80
- 25,
81
- 5
82
  ],
83
  [
84
- 26,
85
  1
86
  ],
87
  [
88
- 26,
89
- 12
90
  ],
91
  [
92
- 27,
93
- 15
94
  ]
95
  ],
96
- "assistant_confidence_threshold": 0.4,
97
- "assistant_lookbehind": 10,
98
  "begin_suppress_tokens": [
99
  220,
100
  50257
101
  ],
102
  "bos_token_id": 50257,
103
  "decoder_start_token_id": 50258,
104
- "diversity_penalty": 0.0,
105
- "do_sample": false,
106
- "early_stopping": false,
107
- "encoder_no_repeat_ngram_size": 0,
108
- "encoder_repetition_penalty": 1.0,
109
- "eos_token_id": [
110
- 50257
 
 
 
111
  ],
112
- "epsilon_cutoff": 0.0,
113
- "eta_cutoff": 0.0,
114
- "forced_decoder_ids": null,
115
  "is_multilingual": true,
116
  "lang_to_id": {
117
  "<|af|>": 50327,
@@ -214,36 +144,105 @@
214
  "<|yo|>": 50325,
215
  "<|zh|>": 50260
216
  },
217
- "language": "hindi",
218
- "length_penalty": 1.0,
219
  "max_initial_timestamp_index": 50,
220
  "max_length": 448,
221
- "min_length": 0,
222
- "no_repeat_ngram_size": 0,
223
  "no_timestamps_token_id": 50363,
224
- "num_assistant_tokens": 20,
225
- "num_assistant_tokens_schedule": "constant",
226
- "num_beam_groups": 1,
227
- "num_beams": 1,
228
- "num_return_sequences": 1,
229
- "output_scores": false,
230
  "pad_token_id": 50257,
231
  "prev_sot_token_id": 50361,
232
- "remove_invalid_values": false,
233
- "repetition_penalty": 1.0,
234
- "return_dict_in_generate": false,
235
  "return_timestamps": false,
236
- "suppress_tokens": [],
237
- "target_lookbehind": 10,
238
- "task": "transcribe",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  "task_to_id": {
240
  "transcribe": 50359,
241
  "translate": 50358
242
  },
243
- "temperature": 1.0,
244
- "top_k": 50,
245
- "top_p": 1.0,
246
- "transformers_version": "5.3.0",
247
- "typical_p": 1.0,
248
- "use_cache": true
249
  }
 
1
  {
2
  "alignment_heads": [
 
 
 
 
3
  [
4
  13,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  15
6
  ],
7
  [
8
+ 15,
 
 
 
 
9
  4
10
  ],
11
  [
12
+ 15,
13
+ 15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ],
15
  [
16
+ 16,
17
  1
18
  ],
19
  [
20
+ 20,
21
+ 0
22
  ],
23
  [
24
+ 23,
25
+ 4
26
  ]
27
  ],
 
 
28
  "begin_suppress_tokens": [
29
  220,
30
  50257
31
  ],
32
  "bos_token_id": 50257,
33
  "decoder_start_token_id": 50258,
34
+ "eos_token_id": 50257,
35
+ "forced_decoder_ids": [
36
+ [
37
+ 1,
38
+ null
39
+ ],
40
+ [
41
+ 2,
42
+ 50359
43
+ ]
44
  ],
 
 
 
45
  "is_multilingual": true,
46
  "lang_to_id": {
47
  "<|af|>": 50327,
 
144
  "<|yo|>": 50325,
145
  "<|zh|>": 50260
146
  },
 
 
147
  "max_initial_timestamp_index": 50,
148
  "max_length": 448,
 
 
149
  "no_timestamps_token_id": 50363,
 
 
 
 
 
 
150
  "pad_token_id": 50257,
151
  "prev_sot_token_id": 50361,
 
 
 
152
  "return_timestamps": false,
153
+ "suppress_tokens": [
154
+ 1,
155
+ 2,
156
+ 7,
157
+ 8,
158
+ 9,
159
+ 10,
160
+ 14,
161
+ 25,
162
+ 26,
163
+ 27,
164
+ 28,
165
+ 29,
166
+ 31,
167
+ 58,
168
+ 59,
169
+ 60,
170
+ 61,
171
+ 62,
172
+ 63,
173
+ 90,
174
+ 91,
175
+ 92,
176
+ 93,
177
+ 359,
178
+ 503,
179
+ 522,
180
+ 542,
181
+ 873,
182
+ 893,
183
+ 902,
184
+ 918,
185
+ 922,
186
+ 931,
187
+ 1350,
188
+ 1853,
189
+ 1982,
190
+ 2460,
191
+ 2627,
192
+ 3246,
193
+ 3253,
194
+ 3268,
195
+ 3536,
196
+ 3846,
197
+ 3961,
198
+ 4183,
199
+ 4667,
200
+ 6585,
201
+ 6647,
202
+ 7273,
203
+ 9061,
204
+ 9383,
205
+ 10428,
206
+ 10929,
207
+ 11938,
208
+ 12033,
209
+ 12331,
210
+ 12562,
211
+ 13793,
212
+ 14157,
213
+ 14635,
214
+ 15265,
215
+ 15618,
216
+ 16553,
217
+ 16604,
218
+ 18362,
219
+ 18956,
220
+ 20075,
221
+ 21675,
222
+ 22520,
223
+ 26130,
224
+ 26161,
225
+ 26435,
226
+ 28279,
227
+ 29464,
228
+ 31650,
229
+ 32302,
230
+ 32470,
231
+ 36865,
232
+ 42863,
233
+ 47425,
234
+ 49870,
235
+ 50254,
236
+ 50258,
237
+ 50358,
238
+ 50359,
239
+ 50360,
240
+ 50361,
241
+ 50362
242
+ ],
243
  "task_to_id": {
244
  "transcribe": 50359,
245
  "translate": 50358
246
  },
247
+ "transformers_version": "5.2.0"
 
 
 
 
 
248
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa5b61f7df3b30ab84d1bcfcebda5a82edd35a1dcd5e965ffe42fcb08d63bc01
3
- size 3086761032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62f73550fa6db24b0c6f6c5962bd0dae80fa644e93cde9cd9c3792971b47fd28
3
+ size 3055544304