File size: 1,478 Bytes
467379f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
{
  "phase": 2,
  "total_samples": 39857,
  "saudi_samples": 11578,
  "saudi_percentage": 29.0488496374539,
  "estimated_cost": 13.275,
  "final_wer": 141.16402945323517,
  "config": {
    "model_name": "openai/whisper-large-v3-turbo",
    "max_duration_in_seconds": 30.0,
    "min_duration_in_seconds": 0.5,
    "lora_r": 64,
    "lora_alpha": 128,
    "lora_dropout": 0.1,
    "target_modules": [
      "q_proj",
      "k_proj",
      "v_proj",
      "out_proj",
      "fc1",
      "fc2"
    ],
    "per_device_train_batch_size": 32,
    "per_device_eval_batch_size": 2,
    "gradient_accumulation_steps": 1,
    "num_train_epochs": 1.0,
    "learning_rate": 0.0001,
    "warmup_ratio": 0.1,
    "weight_decay": 0.01,
    "dataloader_num_workers": 8,
    "dataloader_pin_memory": true,
    "gradient_checkpointing": true,
    "fp16": true,
    "fp16_full_eval": true,
    "eval_steps": 200,
    "save_steps": 200,
    "logging_steps": 50,
    "saudi_oversample_factor": 2.0,
    "output_dir": "./whisper-arabic-saudi",
    "hub_model_id": "whisper-large-v3-turbo-arabic-saudi",
    "hf_token": "[REDACTED_TOKEN]",
    "phase1_hours": 10,
    "phase2_hours": 200,
    "sada_local_dir": "/data/sada_audio_files/"
  },
  "_security_notice": {
    "message": "Sensitive credentials have been automatically redacted for security",
    "redacted_patterns": [
      "huggingface_tokens",
      "api_keys",
      "access_tokens"
    ],
    "sanitized_at": "2025-06-11T10:33:24Z"
  }
}