Spaces:

realruneet
/

indicguard

Sleeping

App Files Files Community

realruneet commited on Feb 6

Commit

ca67ec2

verified ·

1 Parent(s): ff87627

Update config/config.yaml

Browse files

Files changed (1) hide show

config/config.yaml +62 -67

config/config.yaml CHANGED Viewed

@@ -1,11 +1,12 @@
 # ============================================
-# IndicGuard STABLE - NO COLLAPSE
-# FIXES: Epoch 19 NaN collapse
-# Strategy: Ultra-conservative to prevent explosions
 # ============================================
 project:
-  name: "IndicGuard_Final"
   seed: 42
   base_dir: "/home/council/voice_detection"
@@ -81,8 +82,8 @@ model:
     state_dim: 64
     conv_dim: 4
     expand_factor: 2
-    dropout: 0.2
-    stochastic_depth_prob: 0.1
   liquid:
     input_dim: 512
@@ -91,7 +92,7 @@ model:
     tau_max: 10.0
     dt: 0.01
     num_steps: 2
-    dropout: 0.2
   kan:
     input_dim: 256
@@ -99,127 +100,121 @@ model:
     output_dim: 2
     grid_size: 7
     spline_order: 3
-    dropout: 0.2
 training:
-  batch_size: 32             # CRITICAL: 64 → 32 (more stable gradients)
-  epochs: 70               # Increased back to 80 (we have time now)
   accumulate_grad_batches: 1
-  label_smoothing: 0.1
-  gradient_clip: 0.5         # CRITICAL: 1.0 → 0.5 (clip earlier!)
-  warmup_epochs: 5           # CRITICAL: 3 → 5 (slower warmup)
-  # Add gradient clipping per parameter
-  max_grad_norm: 0.5         # CRITICAL: Additional safety
   early_stopping:
     enabled: true
-    patience: 15
     min_delta: 0.001
-    monitor: "val_eer"
   mixup:
-    enabled: false
-    alpha: 0.2
-    prob: 0.0
-  dropout_rate: 0.2
   batch_norm_momentum: 0.1
   batch_norm_eps: 1.0e-5
   optimizer:
     type: "AdamW"
-    learning_rate: 0.00001   # CRITICAL: 0.00015 → 0.00005 (3x LOWER!)
-    weight_decay: 0.01       # CRITICAL: 0.02 → 0.01 (less aggressive)
     betas: [0.9, 0.999]
     eps: 1.0e-8
-    amsgrad: true            # CRITICAL: More stable variant of Adam
   scheduler:
-    type: "ReduceLROnPlateau" # CRITICAL: Changed from CosineAnnealing
-    mode: "min"
-    factor: 0.5              # Reduce LR by 50% when stuck
-    patience: 5              # Wait 5 epochs before reducing
-    min_lr: 1.0e-7
-    threshold: 0.001
-  # MINIMAL AUGMENTATION (only safe ones)
   augmentation:
     codec_simulation:
       enabled: true
-      prob: 0.5
     noise_injection:
       enabled: true
-      snr_db_range: [15, 30]  # CRITICAL: [10,30] → [15,30] (less aggressive)
-      prob: 0.2               # CRITICAL: 0.3 → 0.2 (less often)
     time_stretch:
-      enabled: false
-      rate_range: [0.95, 1.05]
-      prob: 0.0
     pitch_shift:
-      enabled: false
-      semitone_range: [-1, 1]
-      prob: 0.0
     freq_mask:
       enabled: true
-      num_masks: 1
-      freq_mask_param: 8      # CRITICAL: 10 → 8 (less aggressive)
-      prob: 0.2               # CRITICAL: 0.3 → 0.2
     time_mask:
       enabled: true
-      num_masks: 1
-      time_mask_param: 12     # CRITICAL: 15 → 12 (less aggressive)
-      prob: 0.2               # CRITICAL: 0.3 → 0.2
     random_gain:
       enabled: true
-      min_gain_db: -2         # CRITICAL: -3 → -2 (less extreme)
-      max_gain_db: 2          # CRITICAL: 3 → 2
-      prob: 0.15              # CRITICAL: 0.2 → 0.15
 hardware:
   device: "cuda"
-  num_workers: 12
   pin_memory: true
   persistent_workers: true
-  prefetch_factor: 8
-  use_amp: false             # CRITICAL: DISABLED AMP - can cause NaN
-  amp_dtype: "float32"       # CRITICAL: Use full precision
   gradient_checkpointing: false
-  empty_cache_freq: 100
-  # Add NaN checking
-  detect_anomaly: true       # CRITICAL: PyTorch anomaly detection
 paths:
-  checkpoints: "./checkpoints_stable"  # New directory
   logs: "./logs_stable"
   cache: "./cache"
 logging:
   log_dir: "./logs_stable"
-  experiment_name: "indicguard_stable"
-  log_every_n_steps: 10
-  # Log gradient norms to detect explosions
   log_grad_norms: true
 evaluation:
   eer_threshold: 0.06
   monitor_overfitting: true
-  overfitting_threshold: 0.03  # More lenient
   save_best_eer: true
   save_best_auc: true
   save_last: true
-  val_every_n_epochs: 2
   test_at_end: true
   test_best_checkpoint: true
-  # Add validation checks
-  check_nan: true            # CRITICAL: Stop if NaN detected

 # ============================================
+# HACKATHON EMERGENCY - 90 MINUTE BLITZ
+# Target: Train EER 14.6% -> <6%
+# Current: Test EER 2.67% (EXCELLENT!)
+# Strategy: Fix underfitting while preserving generalization
 # ============================================
 project:
+  name: "IndicGuard_Hackathon_Final"
   seed: 42
   base_dir: "/home/council/voice_detection"
     state_dim: 64
     conv_dim: 4
     expand_factor: 2
+    dropout: 0.15           # REDUCED: 0.2 -> 0.15 (less regularization for training)
+    stochastic_depth_prob: 0.05  # REDUCED: 0.1 -> 0.05
   liquid:
     input_dim: 512
     tau_max: 10.0
     dt: 0.01
     num_steps: 2
+    dropout: 0.1            # REDUCED: 0.2 -> 0.1
   kan:
     input_dim: 256
     output_dim: 2
     grid_size: 7
     spline_order: 3
+    dropout: 0.1            # REDUCED: 0.2 -> 0.1
 training:
+  batch_size: 48           # INCREASED: 32 -> 48 (better gradient estimates)
+  epochs: 25               # REDUCED: 70 -> 35 (90min window)
   accumulate_grad_batches: 1
+  label_smoothing: 0.05    # REDUCED: 0.1 -> 0.05 (let model be more confident)
+  gradient_clip: 1.0       # INCREASED: 0.5 -> 1.0 (allow bigger updates)
+  warmup_epochs: 2         # REDUCED: 5 -> 2 (faster ramp-up)
+  max_grad_norm: 1.0       # INCREASED: 0.5 -> 1.0
   early_stopping:
     enabled: true
+    patience: 8            # REDUCED: 15 -> 8 (faster decisions)
     min_delta: 0.001
+    monitor: "train_eer"   # CRITICAL: Monitor TRAIN not VAL!
   mixup:
+    enabled: true          # ENABLED! Helps with training fit
+    alpha: 0.3             # Moderate mixup
+    prob: 0.3              # 30% of batches
+  dropout_rate: 0.1        # REDUCED: 0.2 -> 0.1
   batch_norm_momentum: 0.1
   batch_norm_eps: 1.0e-5
   optimizer:
     type: "AdamW"
+    learning_rate: 0.0003  # INCREASED: 0.00001 -> 0.0003 (30x higher!)
+    weight_decay: 0.005    # REDUCED: 0.01 -> 0.005 (less weight penalty)
     betas: [0.9, 0.999]
     eps: 1.0e-8
+    amsgrad: true
   scheduler:
+    type: "OneCycleLR"     # CHANGED: Fast convergence scheduler
+    max_lr: 0.0003
+    pct_start: 0.15        # Quick warmup (15% of training)
+    div_factor: 10.0       # Start at max_lr/10
+    final_div_factor: 100.0 # End at max_lr/100
+    anneal_strategy: "cos"
+  # AGGRESSIVE AUGMENTATION (Help training fit)
   augmentation:
     codec_simulation:
       enabled: true
+      prob: 0.7            # INCREASED: 0.5 -> 0.7
     noise_injection:
       enabled: true
+      snr_db_range: [10, 35]  # WIDER: [15,30] -> [10,35]
+      prob: 0.4               # INCREASED: 0.2 -> 0.4
     time_stretch:
+      enabled: true        # ENABLED!
+      rate_range: [0.9, 1.1]
+      prob: 0.3
     pitch_shift:
+      enabled: true        # ENABLED!
+      semitone_range: [-2, 2]
+      prob: 0.3
     freq_mask:
       enabled: true
+      num_masks: 2         # INCREASED: 1 -> 2
+      freq_mask_param: 12  # INCREASED: 8 -> 12
+      prob: 0.4            # INCREASED: 0.2 -> 0.4
     time_mask:
       enabled: true
+      num_masks: 2         # INCREASED: 1 -> 2
+      time_mask_param: 20  # INCREASED: 12 -> 20
+      prob: 0.4            # INCREASED: 0.2 -> 0.4
     random_gain:
       enabled: true
+      min_gain_db: -4      # INCREASED: -2 -> -4
+      max_gain_db: 4       # INCREASED: 2 -> 4
+      prob: 0.3            # INCREASED: 0.15 -> 0.3
 hardware:
   device: "cuda"
+  num_workers: 4        # INCREASED: 12 -> 16 (max out data loading)
   pin_memory: true
   persistent_workers: true
+  prefetch_factor: 4       # REDUCED: 8 -> 4 (less memory, more stable)
+  use_amp: true            # ENABLED! Mixed precision for speed
+  amp_dtype: "bfloat16"    # CHANGED: float32 -> bfloat16 (RTX 50-series optimal)
   gradient_checkpointing: false
+  empty_cache_freq: 50     # REDUCED: 100 -> 50 (more frequent cleanup)
+  detect_anomaly: true   # DISABLED: Too slow for hackathon
 paths:
+  checkpoints: "./checkpoints_stable"
   logs: "./logs_stable"
   cache: "./cache"
 logging:
   log_dir: "./logs_stable"
+  experiment_name: "indicguard_stable_final"
+  log_every_n_steps: 5     # REDUCED: 10 -> 5 (more frequent updates)
   log_grad_norms: true
 evaluation:
   eer_threshold: 0.06
   monitor_overfitting: true
+  overfitting_threshold: 0.05  # INCREASED: 0.03 -> 0.05 (more lenient)
   save_best_eer: true
   save_best_auc: true
   save_last: true
+  val_every_n_epochs: 1    # REDUCED: 2 -> 1 (check every epoch)
   test_at_end: true
   test_best_checkpoint: true
+  check_nan: true