Bruno7
/

ksa-whisper-model

+{
+  "phase": 2,
+  "total_samples": 39857,
+  "saudi_samples": 11578,
+  "saudi_percentage": 29.0488496374539,
+  "estimated_cost": 13.275,
+  "final_wer": 141.16402945323517,
+  "config": {
+    "model_name": "openai/whisper-large-v3-turbo",
+    "max_duration_in_seconds": 30.0,
+    "min_duration_in_seconds": 0.5,
+    "lora_r": 64,
+    "lora_alpha": 128,
+    "lora_dropout": 0.1,
+    "target_modules": [
+      "q_proj",
+      "k_proj",
+      "v_proj",
+      "out_proj",
+      "fc1",
+      "fc2"
+    ],
+    "per_device_train_batch_size": 32,
+    "per_device_eval_batch_size": 2,
+    "gradient_accumulation_steps": 1,
+    "num_train_epochs": 1.0,
+    "learning_rate": 0.0001,
+    "warmup_ratio": 0.1,
+    "weight_decay": 0.01,
+    "dataloader_num_workers": 8,
+    "dataloader_pin_memory": true,
+    "gradient_checkpointing": true,
+    "fp16": true,
+    "fp16_full_eval": true,
+    "eval_steps": 200,
+    "save_steps": 200,
+    "logging_steps": 50,
+    "saudi_oversample_factor": 2.0,
+    "output_dir": "./whisper-arabic-saudi",
+    "hub_model_id": "whisper-large-v3-turbo-arabic-saudi",
+    "hf_token": "[REDACTED_TOKEN]",
+    "phase1_hours": 10,
+    "phase2_hours": 200,
+    "sada_local_dir": "/data/sada_audio_files/"
+  },
+  "_security_notice": {
+    "message": "Sensitive credentials have been automatically redacted for security",
+    "redacted_patterns": [
+      "huggingface_tokens",
+      "api_keys",
+      "access_tokens"
+    ],
+    "sanitized_at": "2025-06-11T10:33:24Z"
+  }
+}