End of epoch 1 | AVG Contr_Loss: 5846.9614 | AVG Diver_Loss: 440.8876 | PPL: 454.6
Browse files- config.json +5 -5
- model.safetensors +2 -2
config.json
CHANGED
|
@@ -13,7 +13,7 @@
|
|
| 13 |
"classifier_proj_size": 256,
|
| 14 |
"codebook_negatives": 0,
|
| 15 |
"codevector_dim": 768,
|
| 16 |
-
"codevector_entropy_weight": 0
|
| 17 |
"contrastive_logits_temperature": 0.1,
|
| 18 |
"conv_bias": true,
|
| 19 |
"conv_dim": [
|
|
@@ -47,7 +47,7 @@
|
|
| 47 |
"ctc_zero_infinity": false,
|
| 48 |
"diversity_loss_weight": 0.1,
|
| 49 |
"do_stable_layer_norm": true,
|
| 50 |
-
"dtype": "
|
| 51 |
"eos_token_id": 2,
|
| 52 |
"feat_extract_activation": "gelu",
|
| 53 |
"feat_extract_dropout": 0.0,
|
|
@@ -56,7 +56,7 @@
|
|
| 56 |
"feat_quantizer_dropout": 0.0,
|
| 57 |
"final_dropout": 0.0,
|
| 58 |
"gradient_checkpointing": false,
|
| 59 |
-
"gumbel_temperature": 1.
|
| 60 |
"hidden_act": "gelu",
|
| 61 |
"hidden_dropout": 0.1,
|
| 62 |
"hidden_size": 1024,
|
|
@@ -73,7 +73,7 @@
|
|
| 73 |
"mask_feature_length": 10,
|
| 74 |
"mask_feature_min_masks": 0,
|
| 75 |
"mask_feature_prob": 0.0,
|
| 76 |
-
"mask_time_length":
|
| 77 |
"mask_time_min_masks": 2,
|
| 78 |
"mask_time_min_space": 1,
|
| 79 |
"mask_time_other": 0.0,
|
|
@@ -88,7 +88,7 @@
|
|
| 88 |
"num_conv_pos_embeddings": 128,
|
| 89 |
"num_feat_extract_layers": 7,
|
| 90 |
"num_hidden_layers": 24,
|
| 91 |
-
"num_negatives":
|
| 92 |
"output_hidden_size": 1024,
|
| 93 |
"pad_token_id": 0,
|
| 94 |
"proj_codevector_dim": 768,
|
|
|
|
| 13 |
"classifier_proj_size": 256,
|
| 14 |
"codebook_negatives": 0,
|
| 15 |
"codevector_dim": 768,
|
| 16 |
+
"codevector_entropy_weight": 2.0,
|
| 17 |
"contrastive_logits_temperature": 0.1,
|
| 18 |
"conv_bias": true,
|
| 19 |
"conv_dim": [
|
|
|
|
| 47 |
"ctc_zero_infinity": false,
|
| 48 |
"diversity_loss_weight": 0.1,
|
| 49 |
"do_stable_layer_norm": true,
|
| 50 |
+
"dtype": "float32",
|
| 51 |
"eos_token_id": 2,
|
| 52 |
"feat_extract_activation": "gelu",
|
| 53 |
"feat_extract_dropout": 0.0,
|
|
|
|
| 56 |
"feat_quantizer_dropout": 0.0,
|
| 57 |
"final_dropout": 0.0,
|
| 58 |
"gradient_checkpointing": false,
|
| 59 |
+
"gumbel_temperature": 1.999890002749958,
|
| 60 |
"hidden_act": "gelu",
|
| 61 |
"hidden_dropout": 0.1,
|
| 62 |
"hidden_size": 1024,
|
|
|
|
| 73 |
"mask_feature_length": 10,
|
| 74 |
"mask_feature_min_masks": 0,
|
| 75 |
"mask_feature_prob": 0.0,
|
| 76 |
+
"mask_time_length": 8,
|
| 77 |
"mask_time_min_masks": 2,
|
| 78 |
"mask_time_min_space": 1,
|
| 79 |
"mask_time_other": 0.0,
|
|
|
|
| 88 |
"num_conv_pos_embeddings": 128,
|
| 89 |
"num_feat_extract_layers": 7,
|
| 90 |
"num_hidden_layers": 24,
|
| 91 |
+
"num_negatives": 50,
|
| 92 |
"output_hidden_size": 1024,
|
| 93 |
"pad_token_id": 0,
|
| 94 |
"proj_codevector_dim": 768,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:478c22da2e42062ae01980e30e72f906d7f410dba9c9a9226096077acb7e5078
|
| 3 |
+
size 1269615400
|