Akaash1 commited on
Commit
d866b9e
·
verified ·
1 Parent(s): c876383

End of training

Browse files
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: transformers
3
- license: cc-by-nc-4.0
4
- base_model: facebook/mms-1b-all
5
  tags:
6
  - generated_from_trainer
7
  metrics:
@@ -16,10 +16,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # training
18
 
19
- This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 1.0874
22
- - Wer: 0.8986
23
 
24
  ## Model description
25
 
@@ -38,39 +38,34 @@ More information needed
38
  ### Training hyperparameters
39
 
40
  The following hyperparameters were used during training:
41
- - learning_rate: 0.0001
42
  - train_batch_size: 4
43
- - eval_batch_size: 8
44
  - seed: 42
 
 
45
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
  - lr_scheduler_type: linear
47
  - lr_scheduler_warmup_steps: 500
48
- - num_epochs: 10
49
  - mixed_precision_training: Native AMP
50
 
51
  ### Training results
52
 
53
- | Training Loss | Epoch | Step | Validation Loss | Wer |
54
- |:-------------:|:------:|:----:|:---------------:|:------:|
55
- | 5.9632 | 0.4926 | 500 | 1.0587 | 0.7186 |
56
- | 1.2221 | 0.9852 | 1000 | 0.9062 | 0.6633 |
57
- | 1.0028 | 1.4778 | 1500 | 0.8629 | 0.6419 |
58
- | 1.1607 | 1.9704 | 2000 | 0.8534 | 0.6337 |
59
- | 1.0608 | 2.4631 | 2500 | 1.0133 | 0.7632 |
60
- | 1.3117 | 2.9557 | 3000 | 1.0389 | 0.7365 |
61
- | 1.3259 | 3.4483 | 3500 | 1.0874 | 0.8991 |
62
- | 1.2260 | 3.9409 | 4000 | 1.0875 | 0.8983 |
63
- | 1.3359 | 4.4335 | 4500 | 1.0875 | 0.8989 |
64
- | 1.2309 | 4.9261 | 5000 | 1.0875 | 0.8979 |
65
- | 1.2623 | 5.4187 | 5500 | 1.0875 | 0.8985 |
66
- | 1.2284 | 5.9113 | 6000 | 1.0875 | 0.8982 |
67
- | 1.3132 | 6.4039 | 6500 | 1.0875 | 0.8985 |
68
- | 1.3229 | 6.8966 | 7000 | 1.0874 | 0.8986 |
69
 
70
 
71
  ### Framework versions
72
 
73
- - Transformers 5.0.0.dev0
74
- - Pytorch 2.5.1+cu121
75
  - Datasets 3.6.0
76
  - Tokenizers 0.22.1
 
1
  ---
2
  library_name: transformers
3
+ license: apache-2.0
4
+ base_model: facebook/wav2vec2-xls-r-300m
5
  tags:
6
  - generated_from_trainer
7
  metrics:
 
16
 
17
  # training
18
 
19
+ This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 2.0763
22
+ - Wer: 0.9551
23
 
24
  ## Model description
25
 
 
38
  ### Training hyperparameters
39
 
40
  The following hyperparameters were used during training:
41
+ - learning_rate: 0.0003
42
  - train_batch_size: 4
43
+ - eval_batch_size: 4
44
  - seed: 42
45
+ - gradient_accumulation_steps: 2
46
+ - total_train_batch_size: 8
47
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
48
  - lr_scheduler_type: linear
49
  - lr_scheduler_warmup_steps: 500
50
+ - num_epochs: 25
51
  - mixed_precision_training: Native AMP
52
 
53
  ### Training results
54
 
55
+ | Training Loss | Epoch | Step | Validation Loss | Wer |
56
+ |:-------------:|:-------:|:----:|:---------------:|:------:|
57
+ | 3.6154 | 3.3557 | 500 | 3.6223 | 1.0 |
58
+ | 3.6729 | 6.7114 | 1000 | 3.5869 | 1.0 |
59
+ | 3.6718 | 10.0671 | 1500 | 3.5824 | 1.0 |
60
+ | 3.7365 | 13.4228 | 2000 | 3.5829 | 1.0 |
61
+ | 3.5897 | 16.7785 | 2500 | 3.5750 | 1.0 |
62
+ | 3.5689 | 20.1342 | 3000 | 3.2325 | 1.0 |
63
+ | 2.4663 | 23.4899 | 3500 | 2.0763 | 0.9551 |
 
 
 
 
 
 
 
64
 
65
 
66
  ### Framework versions
67
 
68
+ - Transformers 4.57.3
69
+ - Pytorch 2.6.0+cu124
70
  - Datasets 3.6.0
71
  - Tokenizers 0.22.1
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "activation_dropout": 0.05,
3
- "adapter_attn_dim": 16,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
6
  "add_adapter": false,
@@ -8,10 +8,10 @@
8
  "architectures": [
9
  "Wav2Vec2ForCTC"
10
  ],
11
- "attention_dropout": 0.0,
12
  "bos_token_id": 1,
13
  "classifier_proj_size": 256,
14
- "codevector_dim": 1024,
15
  "contrastive_logits_temperature": 0.1,
16
  "conv_bias": true,
17
  "conv_dim": [
@@ -50,22 +50,23 @@
50
  "feat_extract_activation": "gelu",
51
  "feat_extract_dropout": 0.0,
52
  "feat_extract_norm": "layer",
53
- "feat_proj_dropout": 0.0,
54
  "feat_quantizer_dropout": 0.0,
55
- "final_dropout": 0.05,
 
56
  "hidden_act": "gelu",
57
- "hidden_dropout": 0.0,
58
- "hidden_size": 1280,
59
  "initializer_range": 0.02,
60
- "intermediate_size": 5120,
61
  "layer_norm_eps": 1e-05,
62
- "layerdrop": 0.0,
63
  "mask_feature_length": 10,
64
  "mask_feature_min_masks": 0,
65
  "mask_feature_prob": 0.0,
66
  "mask_time_length": 10,
67
  "mask_time_min_masks": 2,
68
- "mask_time_prob": 0.05,
69
  "model_type": "wav2vec2",
70
  "num_adapter_layers": 3,
71
  "num_attention_heads": 16,
@@ -74,11 +75,11 @@
74
  "num_conv_pos_embedding_groups": 16,
75
  "num_conv_pos_embeddings": 128,
76
  "num_feat_extract_layers": 7,
77
- "num_hidden_layers": 48,
78
  "num_negatives": 100,
79
- "output_hidden_size": 1280,
80
- "pad_token_id": 73,
81
- "proj_codevector_dim": 1024,
82
  "tdnn_dilation": [
83
  1,
84
  2,
@@ -100,9 +101,8 @@
100
  1,
101
  1
102
  ],
103
- "transformers_version": "5.0.0.dev0",
104
- "use_cache": false,
105
  "use_weighted_layer_sum": false,
106
- "vocab_size": 76,
107
  "xvector_output_dim": 512
108
  }
 
1
  {
2
+ "activation_dropout": 0.0,
3
+ "adapter_attn_dim": null,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
6
  "add_adapter": false,
 
8
  "architectures": [
9
  "Wav2Vec2ForCTC"
10
  ],
11
+ "attention_dropout": 0.1,
12
  "bos_token_id": 1,
13
  "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
  "contrastive_logits_temperature": 0.1,
16
  "conv_bias": true,
17
  "conv_dim": [
 
50
  "feat_extract_activation": "gelu",
51
  "feat_extract_dropout": 0.0,
52
  "feat_extract_norm": "layer",
53
+ "feat_proj_dropout": 0.1,
54
  "feat_quantizer_dropout": 0.0,
55
+ "final_dropout": 0.0,
56
+ "gradient_checkpointing": false,
57
  "hidden_act": "gelu",
58
+ "hidden_dropout": 0.1,
59
+ "hidden_size": 1024,
60
  "initializer_range": 0.02,
61
+ "intermediate_size": 4096,
62
  "layer_norm_eps": 1e-05,
63
+ "layerdrop": 0.1,
64
  "mask_feature_length": 10,
65
  "mask_feature_min_masks": 0,
66
  "mask_feature_prob": 0.0,
67
  "mask_time_length": 10,
68
  "mask_time_min_masks": 2,
69
+ "mask_time_prob": 0.075,
70
  "model_type": "wav2vec2",
71
  "num_adapter_layers": 3,
72
  "num_attention_heads": 16,
 
75
  "num_conv_pos_embedding_groups": 16,
76
  "num_conv_pos_embeddings": 128,
77
  "num_feat_extract_layers": 7,
78
+ "num_hidden_layers": 24,
79
  "num_negatives": 100,
80
+ "output_hidden_size": 1024,
81
+ "pad_token_id": 0,
82
+ "proj_codevector_dim": 768,
83
  "tdnn_dilation": [
84
  1,
85
  2,
 
101
  1,
102
  1
103
  ],
104
+ "transformers_version": "4.57.3",
 
105
  "use_weighted_layer_sum": false,
106
+ "vocab_size": 101,
107
  "xvector_output_dim": 512
108
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c24975998840862e3f61a01f98ebc32afe3591229979b25294fa9c5c33e1bdb
3
- size 3859121448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7b961a2ff48bf57947ed7c6742a6be0934d725f9b416ea5855291942677f157
3
+ size 1262221580
preprocessor_config.json CHANGED
@@ -5,6 +5,6 @@
5
  "padding_side": "right",
6
  "padding_value": 0.0,
7
  "processor_class": "Wav2Vec2Processor",
8
- "return_attention_mask": true,
9
  "sampling_rate": 16000
10
  }
 
5
  "padding_side": "right",
6
  "padding_value": 0.0,
7
  "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": false,
9
  "sampling_rate": 16000
10
  }
runs/Dec21_07-13-01_Hong/events.out.tfevents.1766275982.Hong.32364.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17d4fc86d6631e92e0687defdb2a9d1d6705b366a5679628e06e4e994731759e
3
+ size 16807
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4dd97ab74010796555cc2b4db00f8eb2bdacb0bad31c58579c0fa23f46bd7c3
3
- size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27ec27fc626e80c861fd309a025d0fc91a401021154294796cf7d74cfd5a6fd4
3
+ size 5432