JaratX commited on
Commit
5d9cbf3
·
verified ·
1 Parent(s): 4028672

Delete transcription

Browse files
transcription/w2v-bert-darija-finetuned-clean/added_tokens.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "</s>": 57,
3
- "<s>": 56
4
- }
 
 
 
 
 
transcription/w2v-bert-darija-finetuned-clean/config.json DELETED
@@ -1,82 +0,0 @@
1
- {
2
- "_name_or_path": "facebook/w2v-bert-2.0",
3
- "activation_dropout": 0.0,
4
- "adapter_act": "relu",
5
- "adapter_kernel_size": 3,
6
- "adapter_stride": 2,
7
- "add_adapter": true,
8
- "apply_spec_augment": false,
9
- "architectures": [
10
- "Wav2Vec2BertForCTC"
11
- ],
12
- "attention_dropout": 0.0,
13
- "bos_token_id": 1,
14
- "classifier_proj_size": 768,
15
- "codevector_dim": 768,
16
- "conformer_conv_dropout": 0.1,
17
- "contrastive_logits_temperature": 0.1,
18
- "conv_depthwise_kernel_size": 31,
19
- "ctc_loss_reduction": "mean",
20
- "ctc_zero_infinity": false,
21
- "diversity_loss_weight": 0.1,
22
- "eos_token_id": 2,
23
- "feat_proj_dropout": 0.0,
24
- "feat_quantizer_dropout": 0.0,
25
- "feature_projection_input_dim": 160,
26
- "final_dropout": 0.1,
27
- "hidden_act": "swish",
28
- "hidden_dropout": 0.0,
29
- "hidden_size": 1024,
30
- "initializer_range": 0.02,
31
- "intermediate_size": 4096,
32
- "layer_norm_eps": 1e-05,
33
- "layerdrop": 0.0,
34
- "left_max_position_embeddings": 64,
35
- "mask_feature_length": 10,
36
- "mask_feature_min_masks": 0,
37
- "mask_feature_prob": 0.0,
38
- "mask_time_length": 10,
39
- "mask_time_min_masks": 2,
40
- "mask_time_prob": 0.0,
41
- "max_source_positions": 5000,
42
- "model_type": "wav2vec2-bert",
43
- "num_adapter_layers": 1,
44
- "num_attention_heads": 16,
45
- "num_codevector_groups": 2,
46
- "num_codevectors_per_group": 320,
47
- "num_hidden_layers": 24,
48
- "num_negatives": 100,
49
- "output_hidden_size": 1024,
50
- "pad_token_id": 55,
51
- "position_embeddings_type": "relative_key",
52
- "proj_codevector_dim": 768,
53
- "right_max_position_embeddings": 8,
54
- "rotary_embedding_base": 10000,
55
- "tdnn_dilation": [
56
- 1,
57
- 2,
58
- 3,
59
- 1,
60
- 1
61
- ],
62
- "tdnn_dim": [
63
- 512,
64
- 512,
65
- 512,
66
- 512,
67
- 1500
68
- ],
69
- "tdnn_kernel": [
70
- 5,
71
- 3,
72
- 3,
73
- 1,
74
- 1
75
- ],
76
- "torch_dtype": "float32",
77
- "transformers_version": "4.46.3",
78
- "use_intermediate_ffn_before_adapter": false,
79
- "use_weighted_layer_sum": false,
80
- "vocab_size": 58,
81
- "xvector_output_dim": 512
82
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
transcription/w2v-bert-darija-finetuned-clean/preprocessor_config.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "feature_extractor_type": "SeamlessM4TFeatureExtractor",
3
- "feature_size": 80,
4
- "num_mel_bins": 80,
5
- "padding_side": "right",
6
- "padding_value": 0.0,
7
- "processor_class": "Wav2Vec2BertProcessor",
8
- "return_attention_mask": true,
9
- "sampling_rate": 16000,
10
- "stride": 2
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
transcription/w2v-bert-darija-finetuned-clean/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
- "unk_token": "[UNK]"
6
- }
 
 
 
 
 
 
 
transcription/w2v-bert-darija-finetuned-clean/tokenizer_config.json DELETED
@@ -1,48 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "54": {
4
- "content": "[UNK]",
5
- "lstrip": true,
6
- "normalized": false,
7
- "rstrip": true,
8
- "single_word": false,
9
- "special": false
10
- },
11
- "55": {
12
- "content": "[PAD]",
13
- "lstrip": true,
14
- "normalized": false,
15
- "rstrip": true,
16
- "single_word": false,
17
- "special": false
18
- },
19
- "56": {
20
- "content": "<s>",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "57": {
28
- "content": "</s>",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- }
35
- },
36
- "bos_token": "<s>",
37
- "clean_up_tokenization_spaces": false,
38
- "do_lower_case": false,
39
- "eos_token": "</s>",
40
- "model_max_length": 1000000000000000019884624838656,
41
- "pad_token": "[PAD]",
42
- "processor_class": "Wav2Vec2BertProcessor",
43
- "replace_word_delimiter_char": " ",
44
- "target_lang": null,
45
- "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
- "unk_token": "[UNK]",
47
- "word_delimiter_token": "|"
48
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
transcription/w2v-bert-darija-finetuned-clean/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2793aaf5af1a42cb015bfef656c097f2acac87a48fffa9d3175eaeceb802e54
3
- size 5304
 
 
 
 
transcription/w2v-bert-darija-finetuned-clean/vocab.json DELETED
@@ -1,58 +0,0 @@
1
- {
2
- "\t": 0,
3
- "\n": 1,
4
- "!": 3,
5
- "(": 4,
6
- ")": 5,
7
- ",": 6,
8
- ".": 7,
9
- "?": 8,
10
- "[PAD]": 55,
11
- "[UNK]": 54,
12
- "|": 2,
13
- "،": 9,
14
- "؟": 10,
15
- "ء": 11,
16
- "آ": 12,
17
- "أ": 13,
18
- "ؤ": 14,
19
- "إ": 15,
20
- "ئ": 16,
21
- "ا": 17,
22
- "ب": 18,
23
- "ة": 19,
24
- "ت": 20,
25
- "ث": 21,
26
- "ج": 22,
27
- "ح": 23,
28
- "خ": 24,
29
- "د": 25,
30
- "ذ": 26,
31
- "ر": 27,
32
- "ز": 28,
33
- "س": 29,
34
- "ش": 30,
35
- "ص": 31,
36
- "ض": 32,
37
- "ط": 33,
38
- "ظ": 34,
39
- "ع": 35,
40
- "غ": 36,
41
- "ف": 37,
42
- "ق": 38,
43
- "ك": 39,
44
- "ل": 40,
45
- "م": 41,
46
- "ن": 42,
47
- "ه": 43,
48
- "و": 44,
49
- "ى": 45,
50
- "ي": 46,
51
- "ً": 47,
52
- "ٍ": 48,
53
- "ّ": 49,
54
- "ٱ": 50,
55
- "پ": 51,
56
- "ڤ": 52,
57
- "ڭ": 53
58
- }