| export TOKENIZERS_PARALELLISM=true |
| python run_flax_speech_recognition_seq2seq_streaming.py \ |
| --model_name_or_path openai/whisper-tiny \ |
| --dataset_name NbAiLab/NCC_S3_nrk \ |
| --language Norwegian \ |
| --text_column_name text \ |
| --train_split_name train\ |
| --eval_split_name test\ |
| --output_dir ../whisper-tiny-nccs-test\ |
| --overwrite_output_dir\ |
| --warmup_steps 20 \ |
| --do_train \ |
| --do_eval \ |
| --num_train_steps 500 \ |
| --eval_steps 50 \ |
| --learning_rate=1.5e-3 \ |
| --per_device_train_batch_size=64 \ |
| --per_device_eval_batch_size=4 \ |
| --predict_with_generate \ |
| --number_write_predictions=100 \ |
| --streaming=True \ |
| --push_to_hub |
|
|