File size: 1,478 Bytes
467379f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
{
"phase": 2,
"total_samples": 39857,
"saudi_samples": 11578,
"saudi_percentage": 29.0488496374539,
"estimated_cost": 13.275,
"final_wer": 141.16402945323517,
"config": {
"model_name": "openai/whisper-large-v3-turbo",
"max_duration_in_seconds": 30.0,
"min_duration_in_seconds": 0.5,
"lora_r": 64,
"lora_alpha": 128,
"lora_dropout": 0.1,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"out_proj",
"fc1",
"fc2"
],
"per_device_train_batch_size": 32,
"per_device_eval_batch_size": 2,
"gradient_accumulation_steps": 1,
"num_train_epochs": 1.0,
"learning_rate": 0.0001,
"warmup_ratio": 0.1,
"weight_decay": 0.01,
"dataloader_num_workers": 8,
"dataloader_pin_memory": true,
"gradient_checkpointing": true,
"fp16": true,
"fp16_full_eval": true,
"eval_steps": 200,
"save_steps": 200,
"logging_steps": 50,
"saudi_oversample_factor": 2.0,
"output_dir": "./whisper-arabic-saudi",
"hub_model_id": "whisper-large-v3-turbo-arabic-saudi",
"hf_token": "[REDACTED_TOKEN]",
"phase1_hours": 10,
"phase2_hours": 200,
"sada_local_dir": "/data/sada_audio_files/"
},
"_security_notice": {
"message": "Sensitive credentials have been automatically redacted for security",
"redacted_patterns": [
"huggingface_tokens",
"api_keys",
"access_tokens"
],
"sanitized_at": "2025-06-11T10:33:24Z"
}
} |