python run_flax_speech_recognition_seq2seq.py \ --model_name_or_path openai/whisper-small.en \ --dataset_name mozilla-foundation/common_voice_11_0 \ --dataset_config es \ --text_column_name sentence \ --train_split_name test\ --eval_split_name test\ --output_dir whisper-small-flaxtest \ --overwrite_output_dir \ --num_train_epochs=2 \ --max_train_samples 64 \ --max_eval_samples 10 \ --warmup_steps=8 \ --do_train \ --do_eval \ --learning_rate=2e-4 \ --per_device_train_batch_size=2 \ --per_device_eval_batch_size=4 \ --predict_with_generate