update

Files changed (4) hide show

Generate.ipynb +22 -32
config.json +7 -15
pytorch_model.bin +1 -1
training_args.bin +1 -1

Generate.ipynb CHANGED Viewed

@@ -242,7 +242,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
    "id": "1025ffdf-cb83-4895-89ab-a98bc3fab642",
    "metadata": {},
    "outputs": [],
@@ -253,7 +253,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
    "id": "71351cf4-6d00-40ae-89cc-cedb87073625",
    "metadata": {},
    "outputs": [
@@ -261,14 +261,13 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "loading configuration file https://huggingface.co/facebook/wav2vec2-base/resolve/main/config.json from cache at /home/sharpcoder/.cache/huggingface/transformers/c7746642f045322fd01afa31271dd490e677ea11999e68660a92619ec7c892b4.ce1f96bfaf3d7475cb8187b9668c7f19437ade45fb9ceb78d2b06a2cec198015\n",
-      "/home/sharpcoder/.local/lib/python3.10/site-packages/transformers/configuration_utils.py:336: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.\n",
-      "  warnings.warn(\n",
       "Model config Wav2Vec2Config {\n",
-      "  \"activation_dropout\": 0.0,\n",
       "  \"apply_spec_augment\": true,\n",
       "  \"architectures\": [\n",
-      "    \"Wav2Vec2ForPreTraining\"\n",
       "  ],\n",
       "  \"attention_dropout\": 0.1,\n",
       "  \"bos_token_id\": 1,\n",
@@ -309,34 +308,25 @@
       "  \"do_stable_layer_norm\": false,\n",
       "  \"eos_token_id\": 2,\n",
       "  \"feat_extract_activation\": \"gelu\",\n",
       "  \"feat_extract_norm\": \"group\",\n",
       "  \"feat_proj_dropout\": 0.1,\n",
       "  \"feat_quantizer_dropout\": 0.0,\n",
-      "  \"final_dropout\": 0.0,\n",
-      "  \"freeze_feat_extract_train\": true,\n",
-      "  \"gradient_checkpointing\": true,\n",
       "  \"hidden_act\": \"gelu\",\n",
       "  \"hidden_dropout\": 0.1,\n",
       "  \"hidden_size\": 768,\n",
       "  \"initializer_range\": 0.02,\n",
       "  \"intermediate_size\": 3072,\n",
       "  \"layer_norm_eps\": 1e-05,\n",
-      "  \"layerdrop\": 0.0,\n",
-      "  \"mask_channel_length\": 10,\n",
-      "  \"mask_channel_min_space\": 1,\n",
-      "  \"mask_channel_other\": 0.0,\n",
-      "  \"mask_channel_prob\": 0.0,\n",
-      "  \"mask_channel_selection\": \"static\",\n",
       "  \"mask_feature_length\": 10,\n",
       "  \"mask_feature_prob\": 0.0,\n",
       "  \"mask_time_length\": 10,\n",
-      "  \"mask_time_min_space\": 1,\n",
-      "  \"mask_time_other\": 0.0,\n",
       "  \"mask_time_prob\": 0.05,\n",
-      "  \"mask_time_selection\": \"static\",\n",
       "  \"model_type\": \"wav2vec2\",\n",
-      "  \"no_mask_channel_overlap\": false,\n",
-      "  \"no_mask_time_overlap\": false,\n",
       "  \"num_attention_heads\": 12,\n",
       "  \"num_codevector_groups\": 2,\n",
       "  \"num_codevectors_per_group\": 320,\n",
@@ -352,11 +342,10 @@
       "  \"vocab_size\": 32\n",
       "}\n",
       "\n",
-      "loading weights file https://huggingface.co/facebook/wav2vec2-base/resolve/main/pytorch_model.bin from cache at /home/sharpcoder/.cache/huggingface/transformers/ef45231897ce572a660ebc5a63d3702f1a6041c4c5fb78cbec330708531939b3.fcae05302a685f7904c551c8ea571e8bc2a2c4a1777ea81ad66e47f7883a650a\n",
-      "Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2ForCTC: ['project_hid.bias', 'quantizer.weight_proj.bias', 'project_q.weight', 'project_hid.weight', 'quantizer.weight_proj.weight', 'quantizer.codevectors', 'project_q.bias']\n",
-      "- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
-      "- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
-      "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base and are newly initialized: ['lm_head.weight', 'lm_head.bias']\n",
       "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
      ]
     }
@@ -365,7 +354,8 @@
     "from transformers import Wav2Vec2ForCTC\n",
     "\n",
     "model = Wav2Vec2ForCTC.from_pretrained(\n",
-    "    \"facebook/wav2vec2-base\",\n",
     "    ctc_loss_reduction=\"mean\", \n",
     "    pad_token_id=processor.tokenizer.pad_token_id,\n",
     ")"
@@ -373,7 +363,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
    "id": "208eac7d-9fdd-4c82-b46f-25c1a1f246ee",
    "metadata": {},
    "outputs": [
@@ -395,7 +385,7 @@
     "  group_by_length=True,\n",
     "  per_device_train_batch_size=8,\n",
     "  evaluation_strategy=\"steps\",\n",
-    "  num_train_epochs=3,\n",
     "  fp16=False,\n",
     "  gradient_checkpointing=True,\n",
     "  save_steps=500,\n",
@@ -420,7 +410,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
    "id": "d58f6b8c-441c-4fa9-a308-e687948875e1",
    "metadata": {},
    "outputs": [
@@ -480,10 +470,10 @@
     {
      "data": {
       "text/plain": [
-       "TrainOutput(global_step=3, training_loss=10.471563975016275, metrics={'train_runtime': 3.8966, 'train_samples_per_second': 0.77, 'train_steps_per_second': 0.77, 'total_flos': 94374986431680.0, 'train_loss': 10.471563975016275, 'epoch': 3.0})"
       ]
      },
-     "execution_count": 46,
      "metadata": {},
      "output_type": "execute_result"
     }

   },
   {
    "cell_type": "code",
+   "execution_count": 49,
    "id": "1025ffdf-cb83-4895-89ab-a98bc3fab642",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 50,
    "id": "71351cf4-6d00-40ae-89cc-cedb87073625",
    "metadata": {},
    "outputs": [
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "loading configuration file https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/config.json from cache at /home/sharpcoder/.cache/huggingface/transformers/cbb3014bb9f03ead9b94f4a791ff8e777465307670e85079d35e28cbc5d88727.0e2d739358c9b58747bd19db5f9f4320dacabbeb1e6282f5cc1069c5c55a82d2\n",
       "Model config Wav2Vec2Config {\n",
+      "  \"_name_or_path\": \"facebook/wav2vec2-base-960h\",\n",
+      "  \"activation_dropout\": 0.1,\n",
       "  \"apply_spec_augment\": true,\n",
       "  \"architectures\": [\n",
+      "    \"Wav2Vec2ForCTC\"\n",
       "  ],\n",
       "  \"attention_dropout\": 0.1,\n",
       "  \"bos_token_id\": 1,\n",
       "  \"do_stable_layer_norm\": false,\n",
       "  \"eos_token_id\": 2,\n",
       "  \"feat_extract_activation\": \"gelu\",\n",
+      "  \"feat_extract_dropout\": 0.0,\n",
       "  \"feat_extract_norm\": \"group\",\n",
       "  \"feat_proj_dropout\": 0.1,\n",
       "  \"feat_quantizer_dropout\": 0.0,\n",
+      "  \"final_dropout\": 0.1,\n",
+      "  \"gradient_checkpointing\": false,\n",
       "  \"hidden_act\": \"gelu\",\n",
       "  \"hidden_dropout\": 0.1,\n",
+      "  \"hidden_dropout_prob\": 0.1,\n",
       "  \"hidden_size\": 768,\n",
       "  \"initializer_range\": 0.02,\n",
       "  \"intermediate_size\": 3072,\n",
       "  \"layer_norm_eps\": 1e-05,\n",
+      "  \"layerdrop\": 0.1,\n",
       "  \"mask_feature_length\": 10,\n",
       "  \"mask_feature_prob\": 0.0,\n",
       "  \"mask_time_length\": 10,\n",
       "  \"mask_time_prob\": 0.05,\n",
       "  \"model_type\": \"wav2vec2\",\n",
       "  \"num_attention_heads\": 12,\n",
       "  \"num_codevector_groups\": 2,\n",
       "  \"num_codevectors_per_group\": 320,\n",
       "  \"vocab_size\": 32\n",
       "}\n",
       "\n",
+      "loading weights file https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/pytorch_model.bin from cache at /home/sharpcoder/.cache/huggingface/transformers/4cb133d3cf3e58e8a4e088b1fc826611a3bcf3d98b20a0bb49ce8cd5362411b7.beeaccfa4baf44ba6123c23938d8a17f48344361a5e7041782e537dfd78a2037\n",
+      "All model checkpoint weights were used when initializing Wav2Vec2ForCTC.\n",
+      "\n",
+      "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']\n",
       "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
      ]
     }
     "from transformers import Wav2Vec2ForCTC\n",
     "\n",
     "model = Wav2Vec2ForCTC.from_pretrained(\n",
+    "    #\"facebook/wav2vec2-base\",\n",
+    "    \"facebook/wav2vec2-base-960h\",\n",
     "    ctc_loss_reduction=\"mean\", \n",
     "    pad_token_id=processor.tokenizer.pad_token_id,\n",
     ")"
   },
   {
    "cell_type": "code",
+   "execution_count": 51,
    "id": "208eac7d-9fdd-4c82-b46f-25c1a1f246ee",
    "metadata": {},
    "outputs": [
     "  group_by_length=True,\n",
     "  per_device_train_batch_size=8,\n",
     "  evaluation_strategy=\"steps\",\n",
+    "  num_train_epochs=30,\n",
     "  fp16=False,\n",
     "  gradient_checkpointing=True,\n",
     "  save_steps=500,\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 52,
    "id": "d58f6b8c-441c-4fa9-a308-e687948875e1",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "TrainOutput(global_step=3, training_loss=15.702210744222006, metrics={'train_runtime': 3.157, 'train_samples_per_second': 0.95, 'train_steps_per_second': 0.95, 'total_flos': 94374986431680.0, 'train_loss': 15.702210744222006, 'epoch': 3.0})"
       ]
      },
+     "execution_count": 52,
      "metadata": {},
      "output_type": "execute_result"
     }

config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-  "_name_or_path": "facebook/wav2vec2-base",
-  "activation_dropout": 0.0,
   "apply_spec_augment": true,
   "architectures": [
     "Wav2Vec2ForCTC"
@@ -44,33 +44,25 @@
   "do_stable_layer_norm": false,
   "eos_token_id": 2,
   "feat_extract_activation": "gelu",
   "feat_extract_norm": "group",
   "feat_proj_dropout": 0.1,
   "feat_quantizer_dropout": 0.0,
-  "final_dropout": 0.0,
-  "freeze_feat_extract_train": true,
   "hidden_act": "gelu",
   "hidden_dropout": 0.1,
   "hidden_size": 768,
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "layer_norm_eps": 1e-05,
-  "layerdrop": 0.0,
-  "mask_channel_length": 10,
-  "mask_channel_min_space": 1,
-  "mask_channel_other": 0.0,
-  "mask_channel_prob": 0.0,
-  "mask_channel_selection": "static",
   "mask_feature_length": 10,
   "mask_feature_prob": 0.0,
   "mask_time_length": 10,
-  "mask_time_min_space": 1,
-  "mask_time_other": 0.0,
   "mask_time_prob": 0.05,
-  "mask_time_selection": "static",
   "model_type": "wav2vec2",
-  "no_mask_channel_overlap": false,
-  "no_mask_time_overlap": false,
   "num_attention_heads": 12,
   "num_codevector_groups": 2,
   "num_codevectors_per_group": 320,

 {
+  "_name_or_path": "facebook/wav2vec2-base-960h",
+  "activation_dropout": 0.1,
   "apply_spec_augment": true,
   "architectures": [
     "Wav2Vec2ForCTC"
   "do_stable_layer_norm": false,
   "eos_token_id": 2,
   "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
   "feat_extract_norm": "group",
   "feat_proj_dropout": 0.1,
   "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.1,
+  "gradient_checkpointing": false,
   "hidden_act": "gelu",
   "hidden_dropout": 0.1,
+  "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
   "mask_feature_length": 10,
   "mask_feature_prob": 0.0,
   "mask_time_length": 10,
   "mask_time_prob": 0.05,
   "model_type": "wav2vec2",
   "num_attention_heads": 12,
   "num_codevector_groups": 2,
   "num_codevectors_per_group": 320,

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea220cc133930f98791c7b7a1d76d68b159241b625a40a783d4e05d2c93c11d7
 size 377667031

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d3f3abcf77f71881019078ae17cf773e46b424e4176401072a817530aabafac
 size 377667031

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19c7738f5655571cd7c062b8a732e09ad439c7c98c6a054da91449f8906026bf
 size 2735

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed19b832c7db582771504df1e4a7dc89ac95ce233c3914ed7c2c37ff4ea55f88
 size 2735