{'loss': 0.1031, 'grad_norm': 0.2845826745033264, 'learning_rate': 3.2307982638233634e-05, 'epoch': 4.84}
{'loss': 0.0992, 'grad_norm': 0.26327937841415405, 'learning_rate': 2.8533685601056805e-05, 'epoch': 4.86}
{'loss': 0.0958, 'grad_norm': 0.2351849228143692, 'learning_rate': 2.475938856387998e-05, 'epoch': 4.88}
{'loss': 0.0974, 'grad_norm': 0.4339858889579773, 'learning_rate': 2.098509152670315e-05, 'epoch': 4.9}
{'eval_loss': 0.1585860401391983, 'eval_runtime': 110.8759, 'eval_samples_per_second': 161.288, 'eval_steps_per_second': 2.525, 'epoch': 4.9}
{'loss': 0.0997, 'grad_norm': 0.2502700090408325, 'learning_rate': 1.721079448952633e-05, 'epoch': 4.92}
{'loss': 0.0959, 'grad_norm': 0.23213832080364227, 'learning_rate': 1.34364974523495e-05, 'epoch': 4.94}
{'loss': 0.0959, 'grad_norm': 0.2496497929096222, 'learning_rate': 9.662200415172673e-06, 'epoch': 4.95}
{'loss': 0.0993, 'grad_norm': 0.25054439902305603, 'learning_rate': 5.887903377995848e-06, 'epoch': 4.97}
{'loss': 0.1001, 'grad_norm': 0.26840072870254517, 'learning_rate': 2.1136063408190225e-06, 'epoch': 4.99}
{'train_runtime': 35532.337, 'train_samples_per_second': 47.809, 'train_steps_per_second': 0.747, 'train_loss': 0.1767458845833013, 'epoch': 5.0}
CUDA_VISIBLE_DEVICES=0 python finetune.py \
--base_model=openai/whisper-small \
--language=None \
--train_data="dataset/train.json" \
--test_data="dataset/test.json" \
--per_device_train_batch_size=64 \
--per_device_eval_batch_size=64 \
--gradient_accumulation_steps=1 \
--learning_rate=1e-3 \
--num_train_epochs=5 \
--min_audio_len=1 \
--max_audio_len=18 \
--output_dir=output
- Downloads last month
- 2