Josh Cole
commited on
Commit
·
b493b18
1
Parent(s):
fea66a8
update
Browse files- Generate.ipynb +22 -32
- config.json +7 -15
- pytorch_model.bin +1 -1
- training_args.bin +1 -1
Generate.ipynb
CHANGED
|
@@ -242,7 +242,7 @@
|
|
| 242 |
},
|
| 243 |
{
|
| 244 |
"cell_type": "code",
|
| 245 |
-
"execution_count":
|
| 246 |
"id": "1025ffdf-cb83-4895-89ab-a98bc3fab642",
|
| 247 |
"metadata": {},
|
| 248 |
"outputs": [],
|
|
@@ -253,7 +253,7 @@
|
|
| 253 |
},
|
| 254 |
{
|
| 255 |
"cell_type": "code",
|
| 256 |
-
"execution_count":
|
| 257 |
"id": "71351cf4-6d00-40ae-89cc-cedb87073625",
|
| 258 |
"metadata": {},
|
| 259 |
"outputs": [
|
|
@@ -261,14 +261,13 @@
|
|
| 261 |
"name": "stderr",
|
| 262 |
"output_type": "stream",
|
| 263 |
"text": [
|
| 264 |
-
"loading configuration file https://huggingface.co/facebook/wav2vec2-base/resolve/main/config.json from cache at /home/sharpcoder/.cache/huggingface/transformers/
|
| 265 |
-
"/home/sharpcoder/.local/lib/python3.10/site-packages/transformers/configuration_utils.py:336: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.\n",
|
| 266 |
-
" warnings.warn(\n",
|
| 267 |
"Model config Wav2Vec2Config {\n",
|
| 268 |
-
" \"
|
|
|
|
| 269 |
" \"apply_spec_augment\": true,\n",
|
| 270 |
" \"architectures\": [\n",
|
| 271 |
-
" \"
|
| 272 |
" ],\n",
|
| 273 |
" \"attention_dropout\": 0.1,\n",
|
| 274 |
" \"bos_token_id\": 1,\n",
|
|
@@ -309,34 +308,25 @@
|
|
| 309 |
" \"do_stable_layer_norm\": false,\n",
|
| 310 |
" \"eos_token_id\": 2,\n",
|
| 311 |
" \"feat_extract_activation\": \"gelu\",\n",
|
|
|
|
| 312 |
" \"feat_extract_norm\": \"group\",\n",
|
| 313 |
" \"feat_proj_dropout\": 0.1,\n",
|
| 314 |
" \"feat_quantizer_dropout\": 0.0,\n",
|
| 315 |
-
" \"final_dropout\": 0.
|
| 316 |
-
" \"
|
| 317 |
-
" \"gradient_checkpointing\": true,\n",
|
| 318 |
" \"hidden_act\": \"gelu\",\n",
|
| 319 |
" \"hidden_dropout\": 0.1,\n",
|
|
|
|
| 320 |
" \"hidden_size\": 768,\n",
|
| 321 |
" \"initializer_range\": 0.02,\n",
|
| 322 |
" \"intermediate_size\": 3072,\n",
|
| 323 |
" \"layer_norm_eps\": 1e-05,\n",
|
| 324 |
-
" \"layerdrop\": 0.
|
| 325 |
-
" \"mask_channel_length\": 10,\n",
|
| 326 |
-
" \"mask_channel_min_space\": 1,\n",
|
| 327 |
-
" \"mask_channel_other\": 0.0,\n",
|
| 328 |
-
" \"mask_channel_prob\": 0.0,\n",
|
| 329 |
-
" \"mask_channel_selection\": \"static\",\n",
|
| 330 |
" \"mask_feature_length\": 10,\n",
|
| 331 |
" \"mask_feature_prob\": 0.0,\n",
|
| 332 |
" \"mask_time_length\": 10,\n",
|
| 333 |
-
" \"mask_time_min_space\": 1,\n",
|
| 334 |
-
" \"mask_time_other\": 0.0,\n",
|
| 335 |
" \"mask_time_prob\": 0.05,\n",
|
| 336 |
-
" \"mask_time_selection\": \"static\",\n",
|
| 337 |
" \"model_type\": \"wav2vec2\",\n",
|
| 338 |
-
" \"no_mask_channel_overlap\": false,\n",
|
| 339 |
-
" \"no_mask_time_overlap\": false,\n",
|
| 340 |
" \"num_attention_heads\": 12,\n",
|
| 341 |
" \"num_codevector_groups\": 2,\n",
|
| 342 |
" \"num_codevectors_per_group\": 320,\n",
|
|
@@ -352,11 +342,10 @@
|
|
| 352 |
" \"vocab_size\": 32\n",
|
| 353 |
"}\n",
|
| 354 |
"\n",
|
| 355 |
-
"loading weights file https://huggingface.co/facebook/wav2vec2-base/resolve/main/pytorch_model.bin from cache at /home/sharpcoder/.cache/huggingface/transformers/
|
| 356 |
-
"
|
| 357 |
-
"
|
| 358 |
-
"
|
| 359 |
-
"Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base and are newly initialized: ['lm_head.weight', 'lm_head.bias']\n",
|
| 360 |
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
| 361 |
]
|
| 362 |
}
|
|
@@ -365,7 +354,8 @@
|
|
| 365 |
"from transformers import Wav2Vec2ForCTC\n",
|
| 366 |
"\n",
|
| 367 |
"model = Wav2Vec2ForCTC.from_pretrained(\n",
|
| 368 |
-
"
|
|
|
|
| 369 |
" ctc_loss_reduction=\"mean\", \n",
|
| 370 |
" pad_token_id=processor.tokenizer.pad_token_id,\n",
|
| 371 |
")"
|
|
@@ -373,7 +363,7 @@
|
|
| 373 |
},
|
| 374 |
{
|
| 375 |
"cell_type": "code",
|
| 376 |
-
"execution_count":
|
| 377 |
"id": "208eac7d-9fdd-4c82-b46f-25c1a1f246ee",
|
| 378 |
"metadata": {},
|
| 379 |
"outputs": [
|
|
@@ -395,7 +385,7 @@
|
|
| 395 |
" group_by_length=True,\n",
|
| 396 |
" per_device_train_batch_size=8,\n",
|
| 397 |
" evaluation_strategy=\"steps\",\n",
|
| 398 |
-
" num_train_epochs=
|
| 399 |
" fp16=False,\n",
|
| 400 |
" gradient_checkpointing=True,\n",
|
| 401 |
" save_steps=500,\n",
|
|
@@ -420,7 +410,7 @@
|
|
| 420 |
},
|
| 421 |
{
|
| 422 |
"cell_type": "code",
|
| 423 |
-
"execution_count":
|
| 424 |
"id": "d58f6b8c-441c-4fa9-a308-e687948875e1",
|
| 425 |
"metadata": {},
|
| 426 |
"outputs": [
|
|
@@ -480,10 +470,10 @@
|
|
| 480 |
{
|
| 481 |
"data": {
|
| 482 |
"text/plain": [
|
| 483 |
-
"TrainOutput(global_step=3, training_loss=
|
| 484 |
]
|
| 485 |
},
|
| 486 |
-
"execution_count":
|
| 487 |
"metadata": {},
|
| 488 |
"output_type": "execute_result"
|
| 489 |
}
|
|
|
|
| 242 |
},
|
| 243 |
{
|
| 244 |
"cell_type": "code",
|
| 245 |
+
"execution_count": 49,
|
| 246 |
"id": "1025ffdf-cb83-4895-89ab-a98bc3fab642",
|
| 247 |
"metadata": {},
|
| 248 |
"outputs": [],
|
|
|
|
| 253 |
},
|
| 254 |
{
|
| 255 |
"cell_type": "code",
|
| 256 |
+
"execution_count": 50,
|
| 257 |
"id": "71351cf4-6d00-40ae-89cc-cedb87073625",
|
| 258 |
"metadata": {},
|
| 259 |
"outputs": [
|
|
|
|
| 261 |
"name": "stderr",
|
| 262 |
"output_type": "stream",
|
| 263 |
"text": [
|
| 264 |
+
"loading configuration file https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/config.json from cache at /home/sharpcoder/.cache/huggingface/transformers/cbb3014bb9f03ead9b94f4a791ff8e777465307670e85079d35e28cbc5d88727.0e2d739358c9b58747bd19db5f9f4320dacabbeb1e6282f5cc1069c5c55a82d2\n",
|
|
|
|
|
|
|
| 265 |
"Model config Wav2Vec2Config {\n",
|
| 266 |
+
" \"_name_or_path\": \"facebook/wav2vec2-base-960h\",\n",
|
| 267 |
+
" \"activation_dropout\": 0.1,\n",
|
| 268 |
" \"apply_spec_augment\": true,\n",
|
| 269 |
" \"architectures\": [\n",
|
| 270 |
+
" \"Wav2Vec2ForCTC\"\n",
|
| 271 |
" ],\n",
|
| 272 |
" \"attention_dropout\": 0.1,\n",
|
| 273 |
" \"bos_token_id\": 1,\n",
|
|
|
|
| 308 |
" \"do_stable_layer_norm\": false,\n",
|
| 309 |
" \"eos_token_id\": 2,\n",
|
| 310 |
" \"feat_extract_activation\": \"gelu\",\n",
|
| 311 |
+
" \"feat_extract_dropout\": 0.0,\n",
|
| 312 |
" \"feat_extract_norm\": \"group\",\n",
|
| 313 |
" \"feat_proj_dropout\": 0.1,\n",
|
| 314 |
" \"feat_quantizer_dropout\": 0.0,\n",
|
| 315 |
+
" \"final_dropout\": 0.1,\n",
|
| 316 |
+
" \"gradient_checkpointing\": false,\n",
|
|
|
|
| 317 |
" \"hidden_act\": \"gelu\",\n",
|
| 318 |
" \"hidden_dropout\": 0.1,\n",
|
| 319 |
+
" \"hidden_dropout_prob\": 0.1,\n",
|
| 320 |
" \"hidden_size\": 768,\n",
|
| 321 |
" \"initializer_range\": 0.02,\n",
|
| 322 |
" \"intermediate_size\": 3072,\n",
|
| 323 |
" \"layer_norm_eps\": 1e-05,\n",
|
| 324 |
+
" \"layerdrop\": 0.1,\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
" \"mask_feature_length\": 10,\n",
|
| 326 |
" \"mask_feature_prob\": 0.0,\n",
|
| 327 |
" \"mask_time_length\": 10,\n",
|
|
|
|
|
|
|
| 328 |
" \"mask_time_prob\": 0.05,\n",
|
|
|
|
| 329 |
" \"model_type\": \"wav2vec2\",\n",
|
|
|
|
|
|
|
| 330 |
" \"num_attention_heads\": 12,\n",
|
| 331 |
" \"num_codevector_groups\": 2,\n",
|
| 332 |
" \"num_codevectors_per_group\": 320,\n",
|
|
|
|
| 342 |
" \"vocab_size\": 32\n",
|
| 343 |
"}\n",
|
| 344 |
"\n",
|
| 345 |
+
"loading weights file https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/pytorch_model.bin from cache at /home/sharpcoder/.cache/huggingface/transformers/4cb133d3cf3e58e8a4e088b1fc826611a3bcf3d98b20a0bb49ce8cd5362411b7.beeaccfa4baf44ba6123c23938d8a17f48344361a5e7041782e537dfd78a2037\n",
|
| 346 |
+
"All model checkpoint weights were used when initializing Wav2Vec2ForCTC.\n",
|
| 347 |
+
"\n",
|
| 348 |
+
"Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']\n",
|
|
|
|
| 349 |
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
| 350 |
]
|
| 351 |
}
|
|
|
|
| 354 |
"from transformers import Wav2Vec2ForCTC\n",
|
| 355 |
"\n",
|
| 356 |
"model = Wav2Vec2ForCTC.from_pretrained(\n",
|
| 357 |
+
" #\"facebook/wav2vec2-base\",\n",
|
| 358 |
+
" \"facebook/wav2vec2-base-960h\",\n",
|
| 359 |
" ctc_loss_reduction=\"mean\", \n",
|
| 360 |
" pad_token_id=processor.tokenizer.pad_token_id,\n",
|
| 361 |
")"
|
|
|
|
| 363 |
},
|
| 364 |
{
|
| 365 |
"cell_type": "code",
|
| 366 |
+
"execution_count": 51,
|
| 367 |
"id": "208eac7d-9fdd-4c82-b46f-25c1a1f246ee",
|
| 368 |
"metadata": {},
|
| 369 |
"outputs": [
|
|
|
|
| 385 |
" group_by_length=True,\n",
|
| 386 |
" per_device_train_batch_size=8,\n",
|
| 387 |
" evaluation_strategy=\"steps\",\n",
|
| 388 |
+
" num_train_epochs=30,\n",
|
| 389 |
" fp16=False,\n",
|
| 390 |
" gradient_checkpointing=True,\n",
|
| 391 |
" save_steps=500,\n",
|
|
|
|
| 410 |
},
|
| 411 |
{
|
| 412 |
"cell_type": "code",
|
| 413 |
+
"execution_count": 52,
|
| 414 |
"id": "d58f6b8c-441c-4fa9-a308-e687948875e1",
|
| 415 |
"metadata": {},
|
| 416 |
"outputs": [
|
|
|
|
| 470 |
{
|
| 471 |
"data": {
|
| 472 |
"text/plain": [
|
| 473 |
+
"TrainOutput(global_step=3, training_loss=15.702210744222006, metrics={'train_runtime': 3.157, 'train_samples_per_second': 0.95, 'train_steps_per_second': 0.95, 'total_flos': 94374986431680.0, 'train_loss': 15.702210744222006, 'epoch': 3.0})"
|
| 474 |
]
|
| 475 |
},
|
| 476 |
+
"execution_count": 52,
|
| 477 |
"metadata": {},
|
| 478 |
"output_type": "execute_result"
|
| 479 |
}
|
config.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "facebook/wav2vec2-base",
|
| 3 |
-
"activation_dropout": 0.
|
| 4 |
"apply_spec_augment": true,
|
| 5 |
"architectures": [
|
| 6 |
"Wav2Vec2ForCTC"
|
|
@@ -44,33 +44,25 @@
|
|
| 44 |
"do_stable_layer_norm": false,
|
| 45 |
"eos_token_id": 2,
|
| 46 |
"feat_extract_activation": "gelu",
|
|
|
|
| 47 |
"feat_extract_norm": "group",
|
| 48 |
"feat_proj_dropout": 0.1,
|
| 49 |
"feat_quantizer_dropout": 0.0,
|
| 50 |
-
"final_dropout": 0.
|
| 51 |
-
"
|
| 52 |
"hidden_act": "gelu",
|
| 53 |
"hidden_dropout": 0.1,
|
|
|
|
| 54 |
"hidden_size": 768,
|
| 55 |
"initializer_range": 0.02,
|
| 56 |
"intermediate_size": 3072,
|
| 57 |
"layer_norm_eps": 1e-05,
|
| 58 |
-
"layerdrop": 0.
|
| 59 |
-
"mask_channel_length": 10,
|
| 60 |
-
"mask_channel_min_space": 1,
|
| 61 |
-
"mask_channel_other": 0.0,
|
| 62 |
-
"mask_channel_prob": 0.0,
|
| 63 |
-
"mask_channel_selection": "static",
|
| 64 |
"mask_feature_length": 10,
|
| 65 |
"mask_feature_prob": 0.0,
|
| 66 |
"mask_time_length": 10,
|
| 67 |
-
"mask_time_min_space": 1,
|
| 68 |
-
"mask_time_other": 0.0,
|
| 69 |
"mask_time_prob": 0.05,
|
| 70 |
-
"mask_time_selection": "static",
|
| 71 |
"model_type": "wav2vec2",
|
| 72 |
-
"no_mask_channel_overlap": false,
|
| 73 |
-
"no_mask_time_overlap": false,
|
| 74 |
"num_attention_heads": 12,
|
| 75 |
"num_codevector_groups": 2,
|
| 76 |
"num_codevectors_per_group": 320,
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "facebook/wav2vec2-base-960h",
|
| 3 |
+
"activation_dropout": 0.1,
|
| 4 |
"apply_spec_augment": true,
|
| 5 |
"architectures": [
|
| 6 |
"Wav2Vec2ForCTC"
|
|
|
|
| 44 |
"do_stable_layer_norm": false,
|
| 45 |
"eos_token_id": 2,
|
| 46 |
"feat_extract_activation": "gelu",
|
| 47 |
+
"feat_extract_dropout": 0.0,
|
| 48 |
"feat_extract_norm": "group",
|
| 49 |
"feat_proj_dropout": 0.1,
|
| 50 |
"feat_quantizer_dropout": 0.0,
|
| 51 |
+
"final_dropout": 0.1,
|
| 52 |
+
"gradient_checkpointing": false,
|
| 53 |
"hidden_act": "gelu",
|
| 54 |
"hidden_dropout": 0.1,
|
| 55 |
+
"hidden_dropout_prob": 0.1,
|
| 56 |
"hidden_size": 768,
|
| 57 |
"initializer_range": 0.02,
|
| 58 |
"intermediate_size": 3072,
|
| 59 |
"layer_norm_eps": 1e-05,
|
| 60 |
+
"layerdrop": 0.1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
"mask_feature_length": 10,
|
| 62 |
"mask_feature_prob": 0.0,
|
| 63 |
"mask_time_length": 10,
|
|
|
|
|
|
|
| 64 |
"mask_time_prob": 0.05,
|
|
|
|
| 65 |
"model_type": "wav2vec2",
|
|
|
|
|
|
|
| 66 |
"num_attention_heads": 12,
|
| 67 |
"num_codevector_groups": 2,
|
| 68 |
"num_codevectors_per_group": 320,
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 377667031
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d3f3abcf77f71881019078ae17cf773e46b424e4176401072a817530aabafac
|
| 3 |
size 377667031
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2735
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed19b832c7db582771504df1e4a7dc89ac95ce233c3914ed7c2c37ff4ea55f88
|
| 3 |
size 2735
|