Training in progress, step 150
Browse files
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6173655480
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6ca9bec437c0cd91a8eeda2217dd1c4db24cd28a139cc75502b23adc9965093
|
| 3 |
size 6173655480
|
run_speech_recognition_seq2seq_streaming.py
CHANGED
|
@@ -50,6 +50,7 @@ from transformers.trainer_pt_utils import IterableDatasetShard
|
|
| 50 |
from transformers.trainer_utils import get_last_checkpoint, is_main_process
|
| 51 |
from transformers.utils import check_min_version, send_example_telemetry
|
| 52 |
from transformers.utils.versions import require_version
|
|
|
|
| 53 |
|
| 54 |
|
| 55 |
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
|
@@ -386,6 +387,26 @@ def main():
|
|
| 386 |
f"{', '.join(raw_datasets_features)}."
|
| 387 |
)
|
| 388 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
# 5. Load pretrained model, tokenizer, and feature extractor
|
| 390 |
#
|
| 391 |
# Distributed training:
|
|
|
|
| 50 |
from transformers.trainer_utils import get_last_checkpoint, is_main_process
|
| 51 |
from transformers.utils import check_min_version, send_example_telemetry
|
| 52 |
from transformers.utils.versions import require_version
|
| 53 |
+
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift
|
| 54 |
|
| 55 |
|
| 56 |
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
|
|
|
| 387 |
f"{', '.join(raw_datasets_features)}."
|
| 388 |
)
|
| 389 |
|
| 390 |
+
augment_waveform = Compose([
|
| 391 |
+
AddGaussianNoise(min_amplitude=0.005, max_amplitude=0.015, p=0.2),
|
| 392 |
+
TimeStretch(min_rate=0.8, max_rate=1.25, p=0.2, leave_length_unchanged=False),
|
| 393 |
+
PitchShift(min_semitones=-4, max_semitones=4, p=0.2)
|
| 394 |
+
,])
|
| 395 |
+
|
| 396 |
+
def augment_dataset(batch):
|
| 397 |
+
|
| 398 |
+
audio = batch["audio"]["array"]
|
| 399 |
+
# apply augmentation
|
| 400 |
+
augmented_audio = augment_waveform(samples=audio, sample_rate=16000)
|
| 401 |
+
|
| 402 |
+
batch["audio"]["array"] = augmented_audio
|
| 403 |
+
|
| 404 |
+
return batch
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
# call augment dataset on the training set
|
| 408 |
+
raw_datasets["train"] = raw_datasets["train"].map(augment_dataset)
|
| 409 |
+
|
| 410 |
# 5. Load pretrained model, tokenizer, and feature extractor
|
| 411 |
#
|
| 412 |
# Distributed training:
|
runs/Dec20_13-35-03_0393d32b0779/events.out.tfevents.1671536148.0393d32b0779.2738.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08bcbacbcb00c4f7158b2e49a194b441dc14d855720e61e1f56bcb64ed52b1e5
|
| 3 |
+
size 6132
|
train.sh
CHANGED
|
@@ -16,7 +16,7 @@ python -m torch.distributed.launch --nproc_per_node 2 run_speech_recognition_seq
|
|
| 16 |
--per_device_eval_batch_size="16" \
|
| 17 |
--logging_steps="25" \
|
| 18 |
--learning_rate="1e-6" \
|
| 19 |
-
--warmup_steps="
|
| 20 |
--evaluation_strategy="steps" \
|
| 21 |
--eval_steps="50" \
|
| 22 |
--save_strategy="steps" \
|
|
|
|
| 16 |
--per_device_eval_batch_size="16" \
|
| 17 |
--logging_steps="25" \
|
| 18 |
--learning_rate="1e-6" \
|
| 19 |
+
--warmup_steps="40" \
|
| 20 |
--evaluation_strategy="steps" \
|
| 21 |
--eval_steps="50" \
|
| 22 |
--save_strategy="steps" \
|