Spaces:

DhruvB1906
/

StrokeMitra-API

Sleeping

App Files Files Community

DhruvB1906 commited on Mar 21

Commit

f468cc8

verified ·

1 Parent(s): 4e9a3bc

Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

training/augmentation.py +60 -0
training/calibrate.py +419 -0
training/dataset.py +202 -0
training/evaluate.py +444 -0
training/train_cnn_bilstm.py +379 -0
training/train_ensemble_weights.py +376 -0
training/train_hubert_fast.py +297 -0
training/train_hubert_salr.py +225 -0

training/augmentation.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""Audio data augmentation for training."""
+import numpy as np
+import librosa
+class AudioAugmenter:
+    """Apply audio augmentations for data diversity."""
+    def __init__(
+        self,
+        time_stretch_range=(0.8, 1.2),
+        pitch_shift_range=(-2, 2),
+        noise_level_range=(0.005, 0.015),
+        apply_prob=0.5,
+    ):
+        """
+        Initialize augmenter.
+        Args:
+            time_stretch_range: (min_rate, max_rate) for time stretching
+            pitch_shift_range: (min_steps, max_steps) for pitch shifting
+            noise_level_range: (min_level, max_level) for additive noise
+            apply_prob: Probability of applying each augmentation
+        """
+        self.time_stretch_range = time_stretch_range
+        self.pitch_shift_range = pitch_shift_range
+        self.noise_level_range = noise_level_range
+        self.apply_prob = apply_prob
+    def augment(self, waveform: np.ndarray, sr: int) -> np.ndarray:
+        """
+        Apply random augmentations.
+        Args:
+            waveform: Audio waveform
+            sr: Sample rate
+        Returns:
+            Augmented waveform
+        """
+        # Time stretching
+        if np.random.rand() < self.apply_prob:
+            rate = np.random.uniform(*self.time_stretch_range)
+            waveform = librosa.effects.time_stretch(waveform, rate=rate)
+        # Pitch shifting
+        if np.random.rand() < self.apply_prob:
+            n_steps = np.random.uniform(*self.pitch_shift_range)
+            waveform = librosa.effects.pitch_shift(waveform, sr=sr, n_steps=n_steps)
+        # Additive noise
+        if np.random.rand() < self.apply_prob:
+            noise_level = np.random.uniform(*self.noise_level_range)
+            noise = np.random.randn(len(waveform)) * noise_level
+            waveform = waveform + noise
+        # SpecAugment (applied at spectrogram level, not here)
+        return waveform

training/calibrate.py ADDED Viewed

	@@ -0,0 +1,419 @@

+"""
+Calibrate model probabilities using Platt scaling.
+This script:
+1. Loads the ensemble model
+2. Collects predictions on a held-out calibration set
+3. Fits Platt scaling parameters (a, b) via logistic regression
+4. Evaluates calibration quality (ECE, reliability diagrams)
+Usage:
+    python training/calibrate.py
+"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+import mlflow
+import numpy as np
+from tqdm import tqdm
+import yaml
+import logging
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import brier_score_loss, log_loss
+import matplotlib.pyplot as plt
+from training.dataset import DysarthriaDataset
+from training.train_hubert_salr import HuBERTSALRModel
+from training.train_cnn_bilstm import CNNBiLSTMTransformer
+# ══════════════════════════════════════════════════════════════════════════════
+# Calibration Metrics
+# ══════════════════════════════════════════════════════════════════════════════
+def expected_calibration_error(y_true, y_prob, n_bins=10):
+    """
+    Compute Expected Calibration Error (ECE).
+    ECE measures the difference between predicted confidence and actual accuracy.
+    Lower ECE indicates better calibration.
+    Args:
+        y_true: True labels (0 or 1)
+        y_prob: Predicted probabilities (0 to 1)
+        n_bins: Number of bins for binning predictions
+    Returns:
+        ECE value
+    """
+    bin_edges = np.linspace(0, 1, n_bins + 1)
+    bin_indices = np.digitize(y_prob, bin_edges[:-1]) - 1
+    bin_indices = np.clip(bin_indices, 0, n_bins - 1)
+    ece = 0.0
+    for i in range(n_bins):
+        mask = bin_indices == i
+        if mask.sum() > 0:
+            bin_acc = y_true[mask].mean()
+            bin_conf = y_prob[mask].mean()
+            bin_weight = mask.sum() / len(y_true)
+            ece += bin_weight * np.abs(bin_acc - bin_conf)
+    return ece
+def reliability_curve(y_true, y_prob, n_bins=10):
+    """
+    Compute reliability curve data for plotting.
+    Returns:
+        bin_centers, bin_accuracies, bin_confidences, bin_counts
+    """
+    bin_edges = np.linspace(0, 1, n_bins + 1)
+    bin_indices = np.digitize(y_prob, bin_edges[:-1]) - 1
+    bin_indices = np.clip(bin_indices, 0, n_bins - 1)
+    bin_centers = []
+    bin_accuracies = []
+    bin_confidences = []
+    bin_counts = []
+    for i in range(n_bins):
+        mask = bin_indices == i
+        if mask.sum() > 0:
+            bin_centers.append((bin_edges[i] + bin_edges[i + 1]) / 2)
+            bin_accuracies.append(y_true[mask].mean())
+            bin_confidences.append(y_prob[mask].mean())
+            bin_counts.append(mask.sum())
+    return (
+        np.array(bin_centers),
+        np.array(bin_accuracies),
+        np.array(bin_confidences),
+        np.array(bin_counts),
+    )
+# ══════════════════════════════════════════════════════════════════════════════
+# Model Inference
+# ══════════════════════════════════════════════════════════════════════════════
+def collect_predictions(hubert_model, cnn_model, dataloader, alpha, device):
+    """
+    Collect raw logits and probabilities from ensemble.
+    Args:
+        hubert_model: HuBERT-SALR model
+        cnn_model: CNN-BiLSTM model
+        dataloader: Data loader
+        alpha: Ensemble mixing weight
+        device: torch device
+    Returns:
+        logits, probabilities, true labels (all numpy arrays)
+    """
+    all_logits = []
+    all_probs = []
+    all_labels = []
+    hubert_model.eval()
+    cnn_model.eval()
+    with torch.no_grad():
+        for batch in tqdm(dataloader, desc="Collecting predictions"):
+            waveform = batch["waveform"].to(device)
+            spectrogram = batch["spectrogram"].to(device)
+            labels = batch["label"]
+            # Ensemble logits
+            hubert_logits = hubert_model(waveform)
+            cnn_logits = cnn_model(spectrogram)
+            ensemble_logits = alpha * hubert_logits + (1 - alpha) * cnn_logits
+            # Probabilities (uncalibrated)
+            probs = torch.softmax(ensemble_logits, dim=1)[:, 1]
+            all_logits.extend(ensemble_logits[:, 1].cpu().numpy())
+            all_probs.extend(probs.cpu().numpy())
+            all_labels.extend(labels.numpy())
+    return (
+        np.array(all_logits),
+        np.array(all_probs),
+        np.array(all_labels),
+    )
+# ══════════════════════════════════════════════════════════════════════════════
+# Platt Scaling
+# ══════════════════════════════════════════════════════════════════════════════
+def fit_platt_scaling(logits, labels):
+    """
+    Fit Platt scaling parameters.
+    Platt scaling fits:
+        calibrated_prob = sigmoid(a * logit + b)
+    Args:
+        logits: Raw model logits (n_samples,)
+        labels: True binary labels (n_samples,)
+    Returns:
+        a, b parameters
+    """
+    # Reshape for sklearn
+    X = logits.reshape(-1, 1)
+    y = labels
+    # Fit logistic regression (no regularization)
+    lr = LogisticRegression(penalty=None, solver="lbfgs", max_iter=1000)
+    lr.fit(X, y)
+    a = lr.coef_[0][0]
+    b = lr.intercept_[0]
+    return a, b
+def apply_platt_scaling(logits, a, b):
+    """Apply Platt scaling to logits."""
+    z = a * logits + b
+    calibrated_probs = 1 / (1 + np.exp(-z))
+    return calibrated_probs
+# ══════════════════════════════════════════════════════════════════════════════
+# Visualization
+# ══════════════════════════════════════════════════════════════════════════════
+def plot_reliability_diagram(
+    y_true,
+    y_prob_uncal,
+    y_prob_cal,
+    output_path: Path,
+):
+    """Plot reliability diagram comparing uncalibrated vs calibrated."""
+    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
+    for ax, probs, title in zip(
+        axes,
+        [y_prob_uncal, y_prob_cal],
+        ["Uncalibrated", "Calibrated"],
+    ):
+        centers, accs, confs, counts = reliability_curve(y_true, probs, n_bins=10)
+        # Plot reliability curve
+        ax.plot([0, 1], [0, 1], "k--", label="Perfect calibration", linewidth=2)
+        ax.scatter(confs, accs, s=counts * 3, alpha=0.6, label="Model", zorder=5)
+        ax.plot(confs, accs, "o-", linewidth=2, markersize=8)
+        # Compute ECE
+        ece = expected_calibration_error(y_true, probs)
+        brier = brier_score_loss(y_true, probs)
+        ax.set_xlabel("Mean Predicted Probability", fontsize=12)
+        ax.set_ylabel("Fraction of Positives", fontsize=12)
+        ax.set_title(f"{title}\nECE: {ece:.4f}, Brier: {brier:.4f}", fontsize=14)
+        ax.legend(fontsize=10)
+        ax.grid(True, alpha=0.3)
+        ax.set_xlim([0, 1])
+        ax.set_ylim([0, 1])
+    plt.tight_layout()
+    plt.savefig(output_path, dpi=300, bbox_inches="tight")
+    plt.close()
+def plot_histogram_comparison(
+    y_true,
+    y_prob_uncal,
+    y_prob_cal,
+    output_path: Path,
+):
+    """Plot histogram of predicted probabilities."""
+    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+    # Split by true label
+    mask_positive = y_true == 1
+    mask_negative = y_true == 0
+    for i, (probs, title) in enumerate(
+        [(y_prob_uncal, "Uncalibrated"), (y_prob_cal, "Calibrated")]
+    ):
+        # Positive class
+        axes[i, 0].hist(probs[mask_positive], bins=20, alpha=0.7, color="red", edgecolor="black")
+        axes[i, 0].set_xlabel("Predicted Probability", fontsize=12)
+        axes[i, 0].set_ylabel("Count", fontsize=12)
+        axes[i, 0].set_title(f"{title} - True Dysarthric", fontsize=14)
+        axes[i, 0].grid(True, alpha=0.3)
+        # Negative class
+        axes[i, 1].hist(probs[mask_negative], bins=20, alpha=0.7, color="blue", edgecolor="black")
+        axes[i, 1].set_xlabel("Predicted Probability", fontsize=12)
+        axes[i, 1].set_ylabel("Count", fontsize=12)
+        axes[i, 1].set_title(f"{title} - True Healthy", fontsize=14)
+        axes[i, 1].grid(True, alpha=0.3)
+    plt.tight_layout()
+    plt.savefig(output_path, dpi=300, bbox_inches="tight")
+    plt.close()
+# ══════════════════════════════════════════════════════════════════════════════
+# Main
+# ══════════════════════════════════════════════════════════════════════════════
+def main():
+    logging.basicConfig(level=logging.INFO)
+    logger = logging.getLogger(__name__)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    logger.info(f"Using device: {device}")
+    # ──────────────────────────────────────────────────────────────────────────
+    # Load Models
+    # ──────────────────────────────────────────────────────────────────────────
+    logger.info("Loading models...")
+    hubert_checkpoint = Path("models/hubert_salr_best.pt")
+    cnn_checkpoint = Path("models/cnn_bilstm_best.pt")
+    hubert_model = HuBERTSALRModel()
+    hubert_model.load_state_dict(torch.load(hubert_checkpoint, map_location=device)["model_state_dict"])
+    hubert_model.to(device)
+    cnn_model = CNNBiLSTMTransformer()
+    cnn_model.load_state_dict(torch.load(cnn_checkpoint, map_location=device)["model_state_dict"])
+    cnn_model.to(device)
+    # Load optimal alpha
+    with open("configs/model_config.yaml") as f:
+        config = yaml.safe_load(f)
+    alpha = config.get("ensemble", {}).get("alpha", 0.6)
+    logger.info(f"Using ensemble alpha: {alpha}")
+    # ──────────────────────────────────────────────────────────────────────────
+    # Load Calibration Data (use validation set)
+    # ──────────────────────────────────────────────────────────────────────────
+    val_manifest = Path("data/manifests/val.csv")
+    val_dataset = DysarthriaDataset(val_manifest, augmentor=None, mode="val")
+    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)
+    logger.info(f"Calibration samples: {len(val_dataset)}")
+    # ──────────────────────────────────────────────────────────────────────────
+    # Collect Predictions
+    # ──────────────────────────────────────────────────────────────────────────
+    logger.info("Collecting predictions...")
+    logits, probs_uncal, labels = collect_predictions(
+        hubert_model, cnn_model, val_loader, alpha, device
+    )
+    # ──────────────────────────────────────────────────────────────────────────
+    # Fit Platt Scaling
+    # ──────────────────────────────────────────────────────────────────────────
+    mlflow.set_experiment("model_calibration")
+    with mlflow.start_run():
+        logger.info("\nFitting Platt scaling...")
+        a, b = fit_platt_scaling(logits, labels)
+        logger.info(f"Platt parameters: a={a:.6f}, b={b:.6f}")
+        # Apply calibration
+        probs_cal = apply_platt_scaling(logits, a, b)
+        # ──────────────────────────────────────────────────────────────────────
+        # Evaluate Calibration
+        # ──────────────────────────────────────────────────────────────────────
+        ece_uncal = expected_calibration_error(labels, probs_uncal)
+        ece_cal = expected_calibration_error(labels, probs_cal)
+        brier_uncal = brier_score_loss(labels, probs_uncal)
+        brier_cal = brier_score_loss(labels, probs_cal)
+        logloss_uncal = log_loss(labels, probs_uncal)
+        logloss_cal = log_loss(labels, probs_cal)
+        logger.info("\n" + "=" * 80)
+        logger.info("CALIBRATION RESULTS")
+        logger.info("=" * 80)
+        logger.info(f"Expected Calibration Error (ECE):")
+        logger.info(f"  Uncalibrated: {ece_uncal:.4f}")
+        logger.info(f"  Calibrated:   {ece_cal:.4f} ({'↓' if ece_cal < ece_uncal else '↑'} {abs(ece_cal - ece_uncal):.4f})")
+        logger.info(f"\nBrier Score:")
+        logger.info(f"  Uncalibrated: {brier_uncal:.4f}")
+        logger.info(f"  Calibrated:   {brier_cal:.4f} ({'↓' if brier_cal < brier_uncal else '↑'} {abs(brier_cal - brier_uncal):.4f})")
+        logger.info(f"\nLog Loss:")
+        logger.info(f"  Uncalibrated: {logloss_uncal:.4f}")
+        logger.info(f"  Calibrated:   {logloss_cal:.4f} ({'↓' if logloss_cal < logloss_uncal else '↑'} {abs(logloss_cal - logloss_uncal):.4f})")
+        logger.info("=" * 80)
+        # Log to MLflow
+        mlflow.log_params({
+            "platt_a": a,
+            "platt_b": b,
+            "calibration_samples": len(labels),
+        })
+        mlflow.log_metrics({
+            "ece_uncalibrated": ece_uncal,
+            "ece_calibrated": ece_cal,
+            "ece_improvement": ece_uncal - ece_cal,
+            "brier_uncalibrated": brier_uncal,
+            "brier_calibrated": brier_cal,
+            "logloss_uncalibrated": logloss_uncal,
+            "logloss_calibrated": logloss_cal,
+        })
+        # ──────────────────────────────────────────────────────────────────────
+        # Save Results
+        # ──────────────────────────────────────────────────────────────────────
+        output_dir = Path("reports/calibration")
+        output_dir.mkdir(parents=True, exist_ok=True)
+        # Save Platt parameters
+        calibration_config = {
+            "platt_scaling": {
+                "a": float(a),
+                "b": float(b),
+                "ece_uncalibrated": float(ece_uncal),
+                "ece_calibrated": float(ece_cal),
+                "brier_uncalibrated": float(brier_uncal),
+                "brier_calibrated": float(brier_cal),
+            }
+        }
+        config_path = output_dir / "calibration_params.yaml"
+        with open(config_path, "w") as f:
+            yaml.dump(calibration_config, f, default_flow_style=False)
+        mlflow.log_artifact(str(config_path))
+        logger.info(f"\n✓ Calibration parameters saved to {config_path}")
+        # Plot reliability diagram
+        reliability_path = output_dir / "reliability_diagram.png"
+        plot_reliability_diagram(labels, probs_uncal, probs_cal, reliability_path)
+        mlflow.log_artifact(str(reliability_path))
+        logger.info(f"✓ Reliability diagram saved to {reliability_path}")
+        # Plot histogram comparison
+        hist_path = output_dir / "probability_histograms.png"
+        plot_histogram_comparison(labels, probs_uncal, probs_cal, hist_path)
+        mlflow.log_artifact(str(hist_path))
+        logger.info(f"✓ Probability histograms saved to {hist_path}")
+        logger.info("\n✓ Calibration complete!")
+        logger.info(f"  Update configs/model_config.yaml with Platt parameters:")
+        logger.info(f"    a: {a:.6f}")
+        logger.info(f"    b: {b:.6f}")
+if __name__ == "__main__":
+    main()

training/dataset.py ADDED Viewed

	@@ -0,0 +1,202 @@

+"""PyTorch Dataset for dysarthria detection."""
+import logging
+import pandas as pd
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from pathlib import Path
+from src.ingestion.audio_loader import AudioLoader
+from src.ingestion.preprocessor import AudioPreprocessor
+from src.features.mfcc_extractor import MFCCExtractor
+from src.features.prosodic_extractor import ProsodicExtractor
+from src.features.formant_extractor import FormantExtractor
+from src.features.egemaps_extractor import EGeMAPSExtractor
+from src.features.spectrogram_builder import SpectrogramBuilder
+from src.features.feature_fusion import FeatureFusion
+from src.features.schemas import FeatureBundle
+logger = logging.getLogger(__name__)
+class DysarthriaDataset(Dataset):
+    """Dataset for dysarthria detection with on-the-fly feature extraction."""
+    def __init__(
+        self,
+        manifest_path: str | Path,
+        augment: bool = False,
+        cache_features: bool = False,
+    ):
+        """
+        Initialize dataset.
+        Args:
+            manifest_path: Path to CSV manifest (filepath, label, speaker_id, duration)
+            augment: Apply data augmentation
+            cache_features: Cache extracted features in memory
+        """
+        self.manifest = pd.read_csv(manifest_path)
+        self.augment = augment
+        self.cache_features = cache_features
+        self.feature_cache = {} if cache_features else None
+        # Initialize components
+        self.audio_loader = AudioLoader()
+        self.preprocessor = AudioPreprocessor(target_sr=16000)
+        self.mfcc_extractor = MFCCExtractor()
+        self.prosodic_extractor = ProsodicExtractor()
+        self.formant_extractor = FormantExtractor()
+        self.egemaps_extractor = EGeMAPSExtractor()
+        self.spectrogram_builder = SpectrogramBuilder()
+        self.feature_fusion = FeatureFusion()
+        logger.info(f"Dataset initialized: {len(self)} samples")
+    def __len__(self) -> int:
+        return len(self.manifest)
+    def __getitem__(self, idx: int) -> dict:
+        """
+        Get item by index.
+        Returns:
+            dict with keys:
+                - waveform: torch.Tensor (samples,)
+                - spectrogram: torch.Tensor (2, freq, time)
+                - acoustic_features: torch.Tensor (n_features,)
+                - label: torch.Tensor (1,)
+                - speaker_id: str
+        """
+        # Check cache
+        if self.cache_features and idx in self.feature_cache:
+            return self.feature_cache[idx]
+        # Load sample info
+        row = self.manifest.iloc[idx]
+        audio_path = row["file_path"]  # Changed from "filepath" to "file_path"
+        label = int(row["label"])
+        speaker_id = row["speaker_id"]
+        try:
+            # Load and preprocess audio
+            audio_input, waveform = self.audio_loader.load(audio_path)
+            preprocessed = self.preprocessor.process(
+                waveform,
+                sr=audio_input.sample_rate,  # Use original SR
+                original_duration=row["duration"],
+            )
+            waveform = preprocessed.waveform
+            sr = preprocessed.sample_rate
+            # Apply augmentation if training
+            if self.augment:
+                waveform = self._apply_augmentation(waveform, sr)
+            # Extract features
+            mfcc = self.mfcc_extractor.extract(waveform, sr)
+            prosody = self.prosodic_extractor.extract(waveform, sr)
+            formants = self.formant_extractor.extract(waveform, sr)
+            egemaps = self.egemaps_extractor.extract(waveform, sr)
+            spectrogram = self.spectrogram_builder.build(waveform, sr)
+            # Create feature bundle
+            feature_bundle = FeatureBundle(
+                waveform=waveform,
+                sample_rate=sr,
+                duration_sec=preprocessed.duration_sec,
+                mfcc=mfcc,
+                prosody=prosody,
+                formants=formants,
+                egemaps=egemaps,
+                spectrogram=spectrogram,
+            )
+            # Fuse acoustic features
+            feature_bundle = self.feature_fusion.fuse(feature_bundle)
+            # Convert to tensors
+            item = {
+                "waveform": torch.from_numpy(waveform).float(),
+                "spectrogram": torch.from_numpy(spectrogram.stacked).float(),
+                "acoustic_features": torch.from_numpy(feature_bundle.fused_acoustic).float(),
+                "label": torch.tensor([label], dtype=torch.long),
+                "speaker_id": speaker_id,
+            }
+            # Cache if enabled
+            if self.cache_features:
+                self.feature_cache[idx] = item
+            return item
+        except Exception as e:
+            logger.error(f"Failed to load sample {idx} ({audio_path}): {e}")
+            # Return a dummy sample
+            return self._get_dummy_item(label, speaker_id)
+    def _apply_augmentation(self, waveform: np.ndarray, sr: int) -> np.ndarray:
+        """Apply data augmentation."""
+        from training.augmentation import AudioAugmenter
+        augmenter = AudioAugmenter()
+        return augmenter.augment(waveform, sr)
+    def _get_dummy_item(self, label: int, speaker_id: str) -> dict:
+        """Return a dummy item when loading fails."""
+        return {
+            "waveform": torch.zeros(16000 * 10),  # 10 seconds of silence
+            "spectrogram": torch.zeros(2, 128, 313),
+            "acoustic_features": torch.zeros(145),
+            "label": torch.tensor([label], dtype=torch.long),
+            "speaker_id": speaker_id,
+        }
+def collate_fn(batch: list[dict]) -> dict:
+    """
+    Collate function for DataLoader.
+    Handles variable-length sequences by padding.
+    """
+    # Find max lengths
+    max_waveform_len = max(item["waveform"].shape[0] for item in batch)
+    max_time_frames = max(item["spectrogram"].shape[2] for item in batch)
+    # Pad sequences
+    waveforms = []
+    spectrograms = []
+    acoustic_features = []
+    labels = []
+    speaker_ids = []
+    for item in batch:
+        # Pad waveform
+        waveform = item["waveform"]
+        if waveform.shape[0] < max_waveform_len:
+            waveform = torch.nn.functional.pad(
+                waveform, (0, max_waveform_len - waveform.shape[0])
+            )
+        waveforms.append(waveform)
+        # Pad spectrogram
+        spec = item["spectrogram"]
+        if spec.shape[2] < max_time_frames:
+            spec = torch.nn.functional.pad(
+                spec, (0, max_time_frames - spec.shape[2])
+            )
+        spectrograms.append(spec)
+        acoustic_features.append(item["acoustic_features"])
+        labels.append(item["label"])
+        speaker_ids.append(item["speaker_id"])
+    return {
+        "waveform": torch.stack(waveforms),
+        "spectrogram": torch.stack(spectrograms),
+        "acoustic_features": torch.stack(acoustic_features),
+        "label": torch.stack(labels),
+        "speaker_id": speaker_ids,
+    }

training/evaluate.py ADDED Viewed

	@@ -0,0 +1,444 @@

+"""
+Comprehensive model evaluation on test set.
+This script:
+1. Loads trained ensemble model with calibration
+2. Evaluates on held-out test set
+3. Computes classification metrics (accuracy, F1, AUC, sensitivity, specificity)
+4. Generates confusion matrix, ROC curve, PR curve
+5. Performs error analysis
+Usage:
+    python training/evaluate.py
+"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+import mlflow
+import numpy as np
+from tqdm import tqdm
+import yaml
+import pandas as pd
+import logging
+from sklearn.metrics import (
+    accuracy_score,
+    f1_score,
+    roc_auc_score,
+    confusion_matrix,
+    classification_report,
+    roc_curve,
+    precision_recall_curve,
+    average_precision_score,
+)
+import matplotlib.pyplot as plt
+import seaborn as sns
+from training.dataset import DysarthriaDataset
+from training.train_hubert_salr import HuBERTSALRModel
+from training.train_cnn_bilstm import CNNBiLSTMTransformer
+# ══════════════════════════════════════════════════════════════════════════════
+# Model Inference
+# ══════════════════════════════════════════════════════════════════════════════
+def evaluate_model(hubert_model, cnn_model, dataloader, alpha, platt_a, platt_b, device):
+    """
+    Evaluate calibrated ensemble on test set.
+    Returns:
+        predictions, probabilities, labels, file_paths
+    """
+    all_preds = []
+    all_probs = []
+    all_labels = []
+    all_files = []
+    hubert_model.eval()
+    cnn_model.eval()
+    with torch.no_grad():
+        for batch in tqdm(dataloader, desc="Evaluating"):
+            waveform = batch["waveform"].to(device)
+            spectrogram = batch["spectrogram"].to(device)
+            labels = batch["label"]
+            file_paths = batch.get("file_path", [""] * len(labels))
+            # Ensemble logits
+            hubert_logits = hubert_model(waveform)
+            cnn_logits = cnn_model(spectrogram)
+            ensemble_logits = alpha * hubert_logits + (1 - alpha) * cnn_logits
+            # Apply Platt scaling
+            raw_logits = ensemble_logits[:, 1].cpu().numpy()
+            z = platt_a * raw_logits + platt_b
+            calibrated_probs = 1 / (1 + np.exp(-z))
+            # Predictions
+            preds = (calibrated_probs > 0.5).astype(int)
+            all_preds.extend(preds)
+            all_probs.extend(calibrated_probs)
+            all_labels.extend(labels.numpy())
+            all_files.extend(file_paths)
+    return (
+        np.array(all_preds),
+        np.array(all_probs),
+        np.array(all_labels),
+        all_files,
+    )
+# ══════════════════════════════════════════════════════════════════════════════
+# Metrics Computation
+# ══════════════════════════════════════════════════════════════════════════════
+def compute_metrics(y_true, y_pred, y_prob):
+    """Compute comprehensive classification metrics."""
+    # Basic metrics
+    accuracy = accuracy_score(y_true, y_pred)
+    f1 = f1_score(y_true, y_pred, average="binary")
+    auc = roc_auc_score(y_true, y_prob)
+    ap = average_precision_score(y_true, y_prob)
+    # Confusion matrix
+    cm = confusion_matrix(y_true, y_pred)
+    tn, fp, fn, tp = cm.ravel()
+    # Sensitivity and specificity
+    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
+    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
+    # Positive and negative predictive value
+    ppv = tp / (tp + fp) if (tp + fp) > 0 else 0
+    npv = tn / (tn + fn) if (tn + fn) > 0 else 0
+    return {
+        "accuracy": accuracy,
+        "f1": f1,
+        "auc": auc,
+        "average_precision": ap,
+        "sensitivity": sensitivity,
+        "specificity": specificity,
+        "ppv": ppv,
+        "npv": npv,
+        "tp": int(tp),
+        "tn": int(tn),
+        "fp": int(fp),
+        "fn": int(fn),
+        "confusion_matrix": cm,
+    }
+# ══════════════════════════════════════════════════════════════════════════════
+# Visualization
+# ══════════════════════════════════════════════════════════════════════════════
+def plot_confusion_matrix(cm, output_path: Path):
+    """Plot confusion matrix."""
+    plt.figure(figsize=(8, 6))
+    sns.heatmap(
+        cm,
+        annot=True,
+        fmt="d",
+        cmap="Blues",
+        xticklabels=["Healthy", "Dysarthric"],
+        yticklabels=["Healthy", "Dysarthric"],
+        cbar_kws={"label": "Count"},
+    )
+    plt.title("Confusion Matrix - Test Set", fontsize=16, fontweight="bold")
+    plt.ylabel("True Label", fontsize=14)
+    plt.xlabel("Predicted Label", fontsize=14)
+    plt.tight_layout()
+    plt.savefig(output_path, dpi=300, bbox_inches="tight")
+    plt.close()
+def plot_roc_curve(y_true, y_prob, auc_score, output_path: Path):
+    """Plot ROC curve."""
+    fpr, tpr, thresholds = roc_curve(y_true, y_prob)
+    plt.figure(figsize=(8, 6))
+    plt.plot(fpr, tpr, linewidth=2, label=f"Model (AUC = {auc_score:.4f})")
+    plt.plot([0, 1], [0, 1], "k--", linewidth=1, label="Random Classifier")
+    plt.xlabel("False Positive Rate", fontsize=14)
+    plt.ylabel("True Positive Rate", fontsize=14)
+    plt.title("ROC Curve - Test Set", fontsize=16, fontweight="bold")
+    plt.legend(fontsize=12)
+    plt.grid(True, alpha=0.3)
+    plt.tight_layout()
+    plt.savefig(output_path, dpi=300, bbox_inches="tight")
+    plt.close()
+def plot_precision_recall_curve(y_true, y_prob, ap_score, output_path: Path):
+    """Plot Precision-Recall curve."""
+    precision, recall, thresholds = precision_recall_curve(y_true, y_prob)
+    plt.figure(figsize=(8, 6))
+    plt.plot(recall, precision, linewidth=2, label=f"Model (AP = {ap_score:.4f})")
+    plt.xlabel("Recall", fontsize=14)
+    plt.ylabel("Precision", fontsize=14)
+    plt.title("Precision-Recall Curve - Test Set", fontsize=16, fontweight="bold")
+    plt.legend(fontsize=12)
+    plt.grid(True, alpha=0.3)
+    plt.xlim([0, 1])
+    plt.ylim([0, 1])
+    plt.tight_layout()
+    plt.savefig(output_path, dpi=300, bbox_inches="tight")
+    plt.close()
+def plot_probability_distribution(y_true, y_prob, output_path: Path):
+    """Plot distribution of predicted probabilities by class."""
+    plt.figure(figsize=(10, 6))
+    mask_positive = y_true == 1
+    mask_negative = y_true == 0
+    plt.hist(
+        y_prob[mask_negative],
+        bins=30,
+        alpha=0.6,
+        color="blue",
+        label="Healthy",
+        edgecolor="black",
+    )
+    plt.hist(
+        y_prob[mask_positive],
+        bins=30,
+        alpha=0.6,
+        color="red",
+        label="Dysarthric",
+        edgecolor="black",
+    )
+    plt.axvline(0.5, color="black", linestyle="--", linewidth=2, label="Decision Threshold")
+    plt.xlabel("Predicted Probability", fontsize=14)
+    plt.ylabel("Count", fontsize=14)
+    plt.title("Predicted Probability Distribution - Test Set", fontsize=16, fontweight="bold")
+    plt.legend(fontsize=12)
+    plt.grid(True, alpha=0.3, axis="y")
+    plt.tight_layout()
+    plt.savefig(output_path, dpi=300, bbox_inches="tight")
+    plt.close()
+# ══════════════════════════════════════════════════════════════════════════════
+# Error Analysis
+# ══════════════════════════════════════════════════════════════════════════════
+def perform_error_analysis(y_true, y_pred, y_prob, file_paths, output_path: Path):
+    """Identify and save misclassified samples."""
+    errors = []
+    for i, (true_label, pred_label, prob, file_path) in enumerate(
+        zip(y_true, y_pred, y_prob, file_paths)
+    ):
+        if true_label != pred_label:
+            error_type = "False Positive" if pred_label == 1 else "False Negative"
+            confidence = prob if pred_label == 1 else (1 - prob)
+            errors.append({
+                "file_path": file_path,
+                "true_label": "Dysarthric" if true_label == 1 else "Healthy",
+                "predicted_label": "Dysarthric" if pred_label == 1 else "Healthy",
+                "probability": prob,
+                "confidence": confidence,
+                "error_type": error_type,
+            })
+    errors_df = pd.DataFrame(errors)
+    errors_df = errors_df.sort_values("confidence", ascending=False)
+    errors_df.to_csv(output_path, index=False)
+    return errors_df
+# ══════════════════════════════════════════════════════════════════════════════
+# Main
+# ══════════════════════════════════════════════════════════════════════════════
+def main():
+    logging.basicConfig(level=logging.INFO)
+    logger = logging.getLogger(__name__)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    logger.info(f"Using device: {device}")
+    # ──────────────────────────────────────────────────────────────────────────
+    # Load Configuration
+    # ──────────────────────────────────────────────────────────────────────────
+    with open("configs/model_config.yaml") as f:
+        config = yaml.safe_load(f)
+    alpha = config.get("ensemble", {}).get("alpha", 0.6)
+    # Load Platt scaling parameters
+    calibration_file = Path("reports/calibration/calibration_params.yaml")
+    if calibration_file.exists():
+        with open(calibration_file) as f:
+            cal_config = yaml.safe_load(f)
+        platt_a = cal_config["platt_scaling"]["a"]
+        platt_b = cal_config["platt_scaling"]["b"]
+        logger.info(f"Loaded Platt parameters: a={platt_a:.6f}, b={platt_b:.6f}")
+    else:
+        platt_a, platt_b = 1.0, 0.0
+        logger.warning("Calibration parameters not found, using identity mapping")
+    # ──────────────────────────────────────────────────────────────────────────
+    # Load Models
+    # ──────────────────────────────────────────────────────────────────────────
+    logger.info("Loading models...")
+    hubert_checkpoint = Path("models/hubert_salr_best.pt")
+    cnn_checkpoint = Path("models/cnn_bilstm_best.pt")
+    hubert_model = HuBERTSALRModel()
+    hubert_model.load_state_dict(torch.load(hubert_checkpoint, map_location=device)["model_state_dict"])
+    hubert_model.to(device)
+    cnn_model = CNNBiLSTMTransformer()
+    cnn_model.load_state_dict(torch.load(cnn_checkpoint, map_location=device)["model_state_dict"])
+    cnn_model.to(device)
+    # ──────────────────────────────────────────────────────────────────────────
+    # Load Test Data
+    # ──────────────────────────────────────────────────────────────────────────
+    test_manifest = Path("data/manifests/test.csv")
+    test_dataset = DysarthriaDataset(test_manifest, augmentor=None, mode="test")
+    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4)
+    logger.info(f"Test samples: {len(test_dataset)}")
+    # ──────────────────────────────────────────────────────────────────────────
+    # Evaluate
+    # ──────────────────────────────────────────────────────────────────────────
+    mlflow.set_experiment("model_evaluation")
+    with mlflow.start_run():
+        logger.info("\nEvaluating on test set...")
+        y_pred, y_prob, y_true, file_paths = evaluate_model(
+            hubert_model, cnn_model, test_loader, alpha, platt_a, platt_b, device
+        )
+        # Compute metrics
+        metrics = compute_metrics(y_true, y_pred, y_prob)
+        # ──────────────────────────────────────────────────────────────────────
+        # Print Results
+        # ──────────────────────────────────────────────────────────────────────
+        logger.info("\n" + "=" * 80)
+        logger.info("TEST SET EVALUATION RESULTS")
+        logger.info("=" * 80)
+        logger.info(f"Accuracy:           {metrics['accuracy']:.4f}")
+        logger.info(f"F1 Score:           {metrics['f1']:.4f}")
+        logger.info(f"AUC-ROC:            {metrics['auc']:.4f}")
+        logger.info(f"Average Precision:  {metrics['average_precision']:.4f}")
+        logger.info(f"Sensitivity:        {metrics['sensitivity']:.4f}")
+        logger.info(f"Specificity:        {metrics['specificity']:.4f}")
+        logger.info(f"PPV:                {metrics['ppv']:.4f}")
+        logger.info(f"NPV:                {metrics['npv']:.4f}")
+        logger.info("")
+        logger.info("Confusion Matrix:")
+        logger.info(f"  True Negatives:   {metrics['tn']}")
+        logger.info(f"  False Positives:  {metrics['fp']}")
+        logger.info(f"  False Negatives:  {metrics['fn']}")
+        logger.info(f"  True Positives:   {metrics['tp']}")
+        logger.info("=" * 80)
+        # Log to MLflow
+        mlflow.log_params({
+            "ensemble_alpha": alpha,
+            "platt_a": platt_a,
+            "platt_b": platt_b,
+            "test_samples": len(y_true),
+        })
+        mlflow.log_metrics({
+            "test_accuracy": metrics["accuracy"],
+            "test_f1": metrics["f1"],
+            "test_auc": metrics["auc"],
+            "test_ap": metrics["average_precision"],
+            "test_sensitivity": metrics["sensitivity"],
+            "test_specificity": metrics["specificity"],
+            "test_ppv": metrics["ppv"],
+            "test_npv": metrics["npv"],
+        })
+        # ──────────────────────────────────────────────────────────────────────
+        # Save Results
+        # ──────────────────────────────────────────────────────────────────────
+        output_dir = Path("reports/evaluation")
+        output_dir.mkdir(parents=True, exist_ok=True)
+        # Save metrics
+        metrics_file = output_dir / "test_metrics.yaml"
+        with open(metrics_file, "w") as f:
+            # Convert numpy types to Python types
+            metrics_to_save = {k: v for k, v in metrics.items() if k != "confusion_matrix"}
+            yaml.dump(metrics_to_save, f, default_flow_style=False)
+        mlflow.log_artifact(str(metrics_file))
+        logger.info(f"\n✓ Metrics saved to {metrics_file}")
+        # Classification report
+        report = classification_report(
+            y_true,
+            y_pred,
+            target_names=["Healthy", "Dysarthric"],
+            digits=4,
+        )
+        report_file = output_dir / "classification_report.txt"
+        with open(report_file, "w") as f:
+            f.write(report)
+        mlflow.log_artifact(str(report_file))
+        logger.info(f"✓ Classification report saved to {report_file}")
+        # Confusion matrix
+        cm_path = output_dir / "confusion_matrix.png"
+        plot_confusion_matrix(metrics["confusion_matrix"], cm_path)
+        mlflow.log_artifact(str(cm_path))
+        logger.info(f"✓ Confusion matrix plot saved to {cm_path}")
+        # ROC curve
+        roc_path = output_dir / "roc_curve.png"
+        plot_roc_curve(y_true, y_prob, metrics["auc"], roc_path)
+        mlflow.log_artifact(str(roc_path))
+        logger.info(f"✓ ROC curve saved to {roc_path}")
+        # Precision-Recall curve
+        pr_path = output_dir / "precision_recall_curve.png"
+        plot_precision_recall_curve(y_true, y_prob, metrics["average_precision"], pr_path)
+        mlflow.log_artifact(str(pr_path))
+        logger.info(f"✓ Precision-Recall curve saved to {pr_path}")
+        # Probability distribution
+        prob_dist_path = output_dir / "probability_distribution.png"
+        plot_probability_distribution(y_true, y_prob, prob_dist_path)
+        mlflow.log_artifact(str(prob_dist_path))
+        logger.info(f"✓ Probability distribution saved to {prob_dist_path}")
+        # Error analysis
+        errors_file = output_dir / "misclassified_samples.csv"
+        errors_df = perform_error_analysis(y_true, y_pred, y_prob, file_paths, errors_file)
+        mlflow.log_artifact(str(errors_file))
+        logger.info(f"✓ Error analysis saved to {errors_file}")
+        logger.info(f"  Total errors: {len(errors_df)}")
+        logger.info(f"  False Positives: {len(errors_df[errors_df['error_type'] == 'False Positive'])}")
+        logger.info(f"  False Negatives: {len(errors_df[errors_df['error_type'] == 'False Negative'])}")
+        logger.info("\n✓ Evaluation complete!")
+if __name__ == "__main__":
+    main()

training/train_cnn_bilstm.py ADDED Viewed

	@@ -0,0 +1,379 @@

+"""
+Training script for CNN-BiLSTM-Transformer model (spectrogram branch).
+This model processes log-mel spectrograms and CWT scalograms through:
+1. CNN feature extraction (ResNet-style blocks)
+2. BiLSTM temporal modeling
+3. Transformer encoder with self-attention
+4. Classification head
+Usage:
+    python training/train_cnn_bilstm.py
+"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader
+import mlflow
+import numpy as np
+from tqdm import tqdm
+import yaml
+from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
+import logging
+from training.dataset import DysarthriaDataset
+from training.augmentation import AudioAugmentor
+# ══════════════════════════════════════════════════════════════════════════════
+# CNN-BiLSTM-Transformer Model Architecture
+# ══════════════════════════════════════════════════════════════════════════════
+class ResidualBlock(nn.Module):
+    """Residual block for CNN feature extraction."""
+    def __init__(self, in_channels: int, out_channels: int):
+        super().__init__()
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
+        self.bn1 = nn.BatchNorm2d(out_channels)
+        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
+        self.bn2 = nn.BatchNorm2d(out_channels)
+        # Skip connection with 1x1 conv if dimensions change
+        self.skip = nn.Identity()
+        if in_channels != out_channels:
+            self.skip = nn.Conv2d(in_channels, out_channels, kernel_size=1)
+    def forward(self, x):
+        residual = self.skip(x)
+        x = torch.relu(self.bn1(self.conv1(x)))
+        x = self.bn2(self.conv2(x))
+        return torch.relu(x + residual)
+class CNNBiLSTMTransformer(nn.Module):
+    """
+    Spectrogram-based dysarthria detection model.
+    Architecture:
+    - CNN: Extract spatial features from spectrogram
+    - BiLSTM: Model temporal dependencies
+    - Transformer: Self-attention for long-range patterns
+    - Classifier: Binary classification head
+    """
+    def __init__(
+        self,
+        input_channels: int = 2,  # Log-mel + CWT
+        cnn_channels: list = [64, 128, 256],
+        lstm_hidden: int = 256,
+        transformer_heads: int = 8,
+        transformer_layers: int = 4,
+        dropout: float = 0.3,
+    ):
+        super().__init__()
+        # ─────────────────────────────────────────────────────────────────────
+        # CNN Feature Extractor
+        # ─────────────────────────────────────────────────────────────────────
+        self.cnn_blocks = nn.ModuleList()
+        in_ch = input_channels
+        for out_ch in cnn_channels:
+            self.cnn_blocks.append(ResidualBlock(in_ch, out_ch))
+            in_ch = out_ch
+        self.pool = nn.AdaptiveAvgPool2d((None, 1))  # Pool frequency dimension
+        # ─────────────────────────────────────────────────────────────────────
+        # BiLSTM Temporal Modeling
+        # ─────────────────────────────────────────────────────────────────────
+        self.lstm = nn.LSTM(
+            input_size=cnn_channels[-1],
+            hidden_size=lstm_hidden,
+            num_layers=2,
+            batch_first=True,
+            bidirectional=True,
+            dropout=dropout,
+        )
+        # ─────────────────────────────────────────────────────────────────────
+        # Transformer Encoder
+        # ─────────────────────────────────────────────────────────────────────
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=lstm_hidden * 2,  # Bidirectional
+            nhead=transformer_heads,
+            dim_feedforward=lstm_hidden * 4,
+            dropout=dropout,
+            batch_first=True,
+        )
+        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=transformer_layers)
+        # ─────────────────────────────────────────────────────────────────────
+        # Classification Head
+        # ─────────────────────────────────────────────────────────────────────
+        self.classifier = nn.Sequential(
+            nn.Linear(lstm_hidden * 2, 512),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(512, 256),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(256, 2),  # Binary: healthy vs dysarthric
+        )
+    def forward(self, spectrogram):
+        """
+        Args:
+            spectrogram: (batch, 2, freq, time) - Log-mel + CWT
+        Returns:
+            logits: (batch, 2)
+            attention_weights: Transformer attention for explainability
+        """
+        batch_size = spectrogram.size(0)
+        # CNN feature extraction
+        x = spectrogram
+        for block in self.cnn_blocks:
+            x = block(x)
+        # Pool frequency dimension: (batch, channels, freq, time) → (batch, channels, time)
+        x = self.pool(x).squeeze(2)
+        # Transpose for LSTM: (batch, time, channels)
+        x = x.transpose(1, 2)
+        # BiLSTM
+        x, _ = self.lstm(x)
+        # Transformer encoder
+        x = self.transformer(x)
+        # Global average pooling over time
+        x = x.mean(dim=1)  # (batch, lstm_hidden*2)
+        # Classification
+        logits = self.classifier(x)
+        return logits
+# ══════════════════════════════════════════════════════════════════════════════
+# Training Loop
+# ══════════════════════════════════════════════════════════════════════════════
+def train_epoch(model, dataloader, optimizer, criterion, device):
+    """Train for one epoch."""
+    model.train()
+    total_loss = 0
+    all_preds = []
+    all_labels = []
+    for batch in tqdm(dataloader, desc="Training"):
+        spectrogram = batch["spectrogram"].to(device)
+        labels = batch["label"].to(device)
+        # Forward pass
+        optimizer.zero_grad()
+        logits = model(spectrogram)
+        loss = criterion(logits, labels)
+        # Backward pass
+        loss.backward()
+        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
+        optimizer.step()
+        # Metrics
+        total_loss += loss.item()
+        preds = torch.argmax(logits, dim=1).cpu().numpy()
+        all_preds.extend(preds)
+        all_labels.extend(labels.cpu().numpy())
+    avg_loss = total_loss / len(dataloader)
+    accuracy = accuracy_score(all_labels, all_preds)
+    f1 = f1_score(all_labels, all_preds, average="binary")
+    return avg_loss, accuracy, f1
+def validate(model, dataloader, criterion, device):
+    """Validate the model."""
+    model.eval()
+    total_loss = 0
+    all_preds = []
+    all_probs = []
+    all_labels = []
+    with torch.no_grad():
+        for batch in tqdm(dataloader, desc="Validating"):
+            spectrogram = batch["spectrogram"].to(device)
+            labels = batch["label"].to(device)
+            logits = model(spectrogram)
+            loss = criterion(logits, labels)
+            total_loss += loss.item()
+            probs = torch.softmax(logits, dim=1)[:, 1].cpu().numpy()
+            preds = torch.argmax(logits, dim=1).cpu().numpy()
+            all_preds.extend(preds)
+            all_probs.extend(probs)
+            all_labels.extend(labels.cpu().numpy())
+    avg_loss = total_loss / len(dataloader)
+    accuracy = accuracy_score(all_labels, all_preds)
+    f1 = f1_score(all_labels, all_preds, average="binary")
+    auc = roc_auc_score(all_labels, all_probs)
+    return avg_loss, accuracy, f1, auc
+def main():
+    # ──────────────────────────────────────────────────────────────────────────
+    # Setup
+    # ──────────────────────────────────────────────────────────────────────────
+    logging.basicConfig(level=logging.INFO)
+    logger = logging.getLogger(__name__)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    logger.info(f"Using device: {device}")
+    # Load config
+    config_path = Path("configs/model_config.yaml")
+    with open(config_path) as f:
+        config = yaml.safe_load(f)
+    # MLflow setup
+    mlflow.set_experiment("cnn_bilstm_transformer_training")
+    # ──────────────────────────────────────────────────────────────────────────
+    # Data Loading
+    # ──────────────────────────────────────────────────────────────────────────
+    train_manifest = Path("data/manifests/train.csv")
+    val_manifest = Path("data/manifests/val.csv")
+    augmentor = AudioAugmentor(
+        time_stretch_range=(0.9, 1.1),
+        pitch_shift_range=(-2, 2),
+        noise_level=0.005,
+    )
+    train_dataset = DysarthriaDataset(train_manifest, augmentor=augmentor, mode="train")
+    val_dataset = DysarthriaDataset(val_manifest, augmentor=None, mode="val")
+    train_loader = DataLoader(
+        train_dataset,
+        batch_size=config.get("cnn_bilstm", {}).get("batch_size", 16),
+        shuffle=True,
+        num_workers=4,
+        pin_memory=True,
+    )
+    val_loader = DataLoader(
+        val_dataset,
+        batch_size=config.get("cnn_bilstm", {}).get("batch_size", 16),
+        shuffle=False,
+        num_workers=4,
+        pin_memory=True,
+    )
+    logger.info(f"Train samples: {len(train_dataset)}, Val samples: {len(val_dataset)}")
+    # ──────────────────────────────────────────────────────────────────────────
+    # Model Setup
+    # ──────────────────────────────────────────────────────────────────────────
+    model = CNNBiLSTMTransformer(
+        input_channels=2,
+        cnn_channels=[64, 128, 256],
+        lstm_hidden=256,
+        transformer_heads=8,
+        transformer_layers=4,
+        dropout=0.3,
+    ).to(device)
+    logger.info(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
+    # Optimizer and scheduler
+    optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0.01)
+    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
+        optimizer, mode="min", factor=0.5, patience=3, verbose=True
+    )
+    # Loss function with class weights (handle imbalance)
+    criterion = nn.CrossEntropyLoss()
+    # ──────────────────────────────────────────────────────────────────────────
+    # Training Loop
+    # ──────────────────────────────────────────────────────────────────────────
+    num_epochs = config.get("cnn_bilstm", {}).get("epochs", 30)
+    best_val_auc = 0
+    best_model_path = Path("models/cnn_bilstm_best.pt")
+    best_model_path.parent.mkdir(parents=True, exist_ok=True)
+    with mlflow.start_run():
+        # Log hyperparameters
+        mlflow.log_params({
+            "model": "cnn_bilstm_transformer",
+            "epochs": num_epochs,
+            "batch_size": config.get("cnn_bilstm", {}).get("batch_size", 16),
+            "learning_rate": 1e-4,
+            "optimizer": "AdamW",
+        })
+        for epoch in range(1, num_epochs + 1):
+            logger.info(f"\nEpoch {epoch}/{num_epochs}")
+            # Train
+            train_loss, train_acc, train_f1 = train_epoch(
+                model, train_loader, optimizer, criterion, device
+            )
+            # Validate
+            val_loss, val_acc, val_f1, val_auc = validate(
+                model, val_loader, criterion, device
+            )
+            # Learning rate scheduling
+            scheduler.step(val_loss)
+            # Logging
+            logger.info(
+                f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}, F1: {train_f1:.4f}"
+            )
+            logger.info(
+                f"Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}, F1: {val_f1:.4f}, AUC: {val_auc:.4f}"
+            )
+            mlflow.log_metrics({
+                "train_loss": train_loss,
+                "train_accuracy": train_acc,
+                "train_f1": train_f1,
+                "val_loss": val_loss,
+                "val_accuracy": val_acc,
+                "val_f1": val_f1,
+                "val_auc": val_auc,
+                "learning_rate": optimizer.param_groups[0]["lr"],
+            }, step=epoch)
+            # Save best model
+            if val_auc > best_val_auc:
+                best_val_auc = val_auc
+                torch.save({
+                    "epoch": epoch,
+                    "model_state_dict": model.state_dict(),
+                    "optimizer_state_dict": optimizer.state_dict(),
+                    "val_auc": val_auc,
+                }, best_model_path)
+                logger.info(f"✓ New best model saved (AUC: {val_auc:.4f})")
+                mlflow.log_artifact(str(best_model_path))
+        logger.info(f"\n✓ Training complete! Best validation AUC: {best_val_auc:.4f}")
+        mlflow.log_metric("best_val_auc", best_val_auc)
+if __name__ == "__main__":
+    main()

training/train_ensemble_weights.py ADDED Viewed

	@@ -0,0 +1,376 @@

+"""
+Optimize ensemble weights between HuBERT-SALR and CNN-BiLSTM models.
+This script performs grid search to find the optimal alpha (mixing weight):
+    ensemble_logits = alpha * hubert_logits + (1 - alpha) * cnn_logits
+Usage:
+    python training/train_ensemble_weights.py
+"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+import mlflow
+import numpy as np
+from tqdm import tqdm
+import yaml
+import pandas as pd
+from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix
+import logging
+import matplotlib.pyplot as plt
+import seaborn as sns
+from training.dataset import DysarthriaDataset
+# ══════════════════════════════════════════════════════════════════════════════
+# Model Loading Utilities
+# ══════════════════════════════════════════════════════════════════════════════
+def load_hubert_salr(checkpoint_path: Path, device):
+    """Load trained HuBERT-SALR model."""
+    from training.train_hubert_salr import HuBERTSALRModel
+    model = HuBERTSALRModel()
+    checkpoint = torch.load(checkpoint_path, map_location=device)
+    model.load_state_dict(checkpoint["model_state_dict"])
+    model.to(device)
+    model.eval()
+    return model
+def load_cnn_bilstm(checkpoint_path: Path, device):
+    """Load trained CNN-BiLSTM model."""
+    from training.train_cnn_bilstm import CNNBiLSTMTransformer
+    model = CNNBiLSTMTransformer()
+    checkpoint = torch.load(checkpoint_path, map_location=device)
+    model.load_state_dict(checkpoint["model_state_dict"])
+    model.to(device)
+    model.eval()
+    return model
+# ══════════════════════════════════════════════════════════════════════════════
+# Ensemble Evaluation
+# ══════════════════════════════════════════════════════════════════════════════
+def evaluate_ensemble(
+    hubert_model,
+    cnn_model,
+    dataloader,
+    alpha: float,
+    device,
+):
+    """
+    Evaluate ensemble with given alpha weight.
+    Args:
+        hubert_model: HuBERT-SALR model
+        cnn_model: CNN-BiLSTM model
+        dataloader: Validation data
+        alpha: Mixing weight (0 to 1)
+        device: torch device
+    Returns:
+        Dict of metrics
+    """
+    all_preds = []
+    all_probs = []
+    all_labels = []
+    with torch.no_grad():
+        for batch in tqdm(dataloader, desc=f"Alpha={alpha:.2f}", leave=False):
+            waveform = batch["waveform"].to(device)
+            spectrogram = batch["spectrogram"].to(device)
+            labels = batch["label"].to(device)
+            # Get predictions from both models
+            hubert_logits = hubert_model(waveform)
+            cnn_logits = cnn_model(spectrogram)
+            # Ensemble
+            ensemble_logits = alpha * hubert_logits + (1 - alpha) * cnn_logits
+            # Convert to predictions
+            probs = torch.softmax(ensemble_logits, dim=1)[:, 1].cpu().numpy()
+            preds = torch.argmax(ensemble_logits, dim=1).cpu().numpy()
+            all_preds.extend(preds)
+            all_probs.extend(probs)
+            all_labels.extend(labels.cpu().numpy())
+    # Compute metrics
+    accuracy = accuracy_score(all_labels, all_preds)
+    f1 = f1_score(all_labels, all_preds, average="binary")
+    auc = roc_auc_score(all_labels, all_probs)
+    cm = confusion_matrix(all_labels, all_preds)
+    # Compute sensitivity and specificity
+    tn, fp, fn, tp = cm.ravel()
+    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
+    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
+    return {
+        "alpha": alpha,
+        "accuracy": accuracy,
+        "f1": f1,
+        "auc": auc,
+        "sensitivity": sensitivity,
+        "specificity": specificity,
+        "confusion_matrix": cm,
+    }
+# ══════════════════════════════════════════════════════════════════════════════
+# Grid Search
+# ══════════════════════════════════════════════════════════════════════════════
+def grid_search_alpha(
+    hubert_model,
+    cnn_model,
+    dataloader,
+    device,
+    alpha_range=(0.0, 1.0),
+    num_points=21,
+):
+    """
+    Perform grid search over alpha values.
+    Args:
+        hubert_model: HuBERT-SALR model
+        cnn_model: CNN-BiLSTM model
+        dataloader: Validation data
+        device: torch device
+        alpha_range: (min, max) alpha values
+        num_points: Number of alpha values to test
+    Returns:
+        DataFrame with results for each alpha
+    """
+    alphas = np.linspace(alpha_range[0], alpha_range[1], num_points)
+    results = []
+    for alpha in alphas:
+        metrics = evaluate_ensemble(hubert_model, cnn_model, dataloader, alpha, device)
+        results.append(metrics)
+    return pd.DataFrame(results)
+# ══════════════════════════════════════════════════════════════════════════════
+# Visualization
+# ══════════════════════════════════════════════════════════════════════════════
+def plot_alpha_search(results_df, output_path: Path):
+    """Plot metrics vs alpha."""
+    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+    metrics = ["accuracy", "f1", "auc", "sensitivity"]
+    titles = ["Accuracy", "F1 Score", "AUC-ROC", "Sensitivity"]
+    for ax, metric, title in zip(axes.flat, metrics, titles):
+        ax.plot(results_df["alpha"], results_df[metric], marker="o", linewidth=2)
+        ax.set_xlabel("Alpha (HuBERT weight)", fontsize=12)
+        ax.set_ylabel(title, fontsize=12)
+        ax.set_title(f"{title} vs Alpha", fontsize=14)
+        ax.grid(True, alpha=0.3)
+        # Mark best alpha
+        best_idx = results_df[metric].idxmax()
+        best_alpha = results_df.loc[best_idx, "alpha"]
+        best_value = results_df.loc[best_idx, metric]
+        ax.axvline(best_alpha, color="red", linestyle="--", alpha=0.5)
+        ax.scatter([best_alpha], [best_value], color="red", s=100, zorder=5)
+        ax.text(
+            best_alpha,
+            best_value,
+            f"α={best_alpha:.2f}\n{best_value:.4f}",
+            ha="center",
+            va="bottom",
+            fontsize=10,
+        )
+    plt.tight_layout()
+    plt.savefig(output_path, dpi=300, bbox_inches="tight")
+    plt.close()
+def plot_confusion_matrix(cm, alpha, output_path: Path):
+    """Plot confusion matrix for best alpha."""
+    plt.figure(figsize=(8, 6))
+    sns.heatmap(
+        cm,
+        annot=True,
+        fmt="d",
+        cmap="Blues",
+        xticklabels=["Healthy", "Dysarthric"],
+        yticklabels=["Healthy", "Dysarthric"],
+    )
+    plt.title(f"Confusion Matrix (α={alpha:.2f})", fontsize=14)
+    plt.ylabel("True Label", fontsize=12)
+    plt.xlabel("Predicted Label", fontsize=12)
+    plt.tight_layout()
+    plt.savefig(output_path, dpi=300, bbox_inches="tight")
+    plt.close()
+# ══════════════════════════════════════════════════════════════════════════════
+# Main
+# ══════════════════════════════════════════════════════════════════════════════
+def main():
+    logging.basicConfig(level=logging.INFO)
+    logger = logging.getLogger(__name__)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    logger.info(f"Using device: {device}")
+    # ──────────────────────────────────────────────────────────────────────────
+    # Load Models
+    # ──────────────────────────────────────────────────────────────────────────
+    hubert_checkpoint = Path("models/hubert_salr_best.pt")
+    cnn_checkpoint = Path("models/cnn_bilstm_best.pt")
+    if not hubert_checkpoint.exists():
+        logger.error(f"HuBERT checkpoint not found: {hubert_checkpoint}")
+        logger.error("Please train HuBERT-SALR first: python training/train_hubert_salr.py")
+        return
+    if not cnn_checkpoint.exists():
+        logger.error(f"CNN-BiLSTM checkpoint not found: {cnn_checkpoint}")
+        logger.error("Please train CNN-BiLSTM first: python training/train_cnn_bilstm.py")
+        return
+    logger.info("Loading HuBERT-SALR model...")
+    hubert_model = load_hubert_salr(hubert_checkpoint, device)
+    logger.info("Loading CNN-BiLSTM model...")
+    cnn_model = load_cnn_bilstm(cnn_checkpoint, device)
+    # ─────────────────────��────────────────────────────────────────────────────
+    # Load Validation Data
+    # ──────────────────────────────────────────────────────────────────────────
+    val_manifest = Path("data/manifests/val.csv")
+    val_dataset = DysarthriaDataset(val_manifest, augmentor=None, mode="val")
+    val_loader = DataLoader(
+        val_dataset,
+        batch_size=16,
+        shuffle=False,
+        num_workers=4,
+        pin_memory=True,
+    )
+    logger.info(f"Validation samples: {len(val_dataset)}")
+    # ──────────────────────────────────────────────────────────────────────────
+    # Grid Search
+    # ──────────────────────────────────────────────────────────────────────────
+    mlflow.set_experiment("ensemble_weight_optimization")
+    with mlflow.start_run():
+        logger.info("\nStarting grid search over alpha values...")
+        results_df = grid_search_alpha(
+            hubert_model,
+            cnn_model,
+            val_loader,
+            device,
+            alpha_range=(0.0, 1.0),
+            num_points=21,
+        )
+        # Find best alpha for each metric
+        best_alpha_auc = results_df.loc[results_df["auc"].idxmax(), "alpha"]
+        best_alpha_f1 = results_df.loc[results_df["f1"].idxmax(), "alpha"]
+        best_alpha_acc = results_df.loc[results_df["accuracy"].idxmax(), "alpha"]
+        logger.info("\n" + "=" * 80)
+        logger.info("GRID SEARCH RESULTS")
+        logger.info("=" * 80)
+        logger.info(f"Best alpha (AUC):      {best_alpha_auc:.2f}")
+        logger.info(f"Best alpha (F1):       {best_alpha_f1:.2f}")
+        logger.info(f"Best alpha (Accuracy): {best_alpha_acc:.2f}")
+        logger.info("=" * 80)
+        # Use AUC as primary metric
+        best_alpha = best_alpha_auc
+        best_row = results_df.loc[results_df["alpha"] == best_alpha].iloc[0]
+        logger.info(f"\nOptimal alpha: {best_alpha:.2f}")
+        logger.info(f"  Accuracy:    {best_row['accuracy']:.4f}")
+        logger.info(f"  F1 Score:    {best_row['f1']:.4f}")
+        logger.info(f"  AUC:         {best_row['auc']:.4f}")
+        logger.info(f"  Sensitivity: {best_row['sensitivity']:.4f}")
+        logger.info(f"  Specificity: {best_row['specificity']:.4f}")
+        # Log to MLflow
+        mlflow.log_params({
+            "num_alpha_points": 21,
+            "alpha_range_min": 0.0,
+            "alpha_range_max": 1.0,
+        })
+        mlflow.log_metrics({
+            "best_alpha": best_alpha,
+            "best_accuracy": best_row["accuracy"],
+            "best_f1": best_row["f1"],
+            "best_auc": best_row["auc"],
+            "best_sensitivity": best_row["sensitivity"],
+            "best_specificity": best_row["specificity"],
+        })
+        # Save results
+        output_dir = Path("reports/ensemble_optimization")
+        output_dir.mkdir(parents=True, exist_ok=True)
+        results_csv = output_dir / "alpha_search_results.csv"
+        results_df.to_csv(results_csv, index=False)
+        mlflow.log_artifact(str(results_csv))
+        logger.info(f"\n✓ Results saved to {results_csv}")
+        # Plot metrics vs alpha
+        plot_path = output_dir / "alpha_search_plot.png"
+        plot_alpha_search(results_df, plot_path)
+        mlflow.log_artifact(str(plot_path))
+        logger.info(f"✓ Plots saved to {plot_path}")
+        # Plot confusion matrix for best alpha
+        cm_path = output_dir / "confusion_matrix_best_alpha.png"
+        plot_confusion_matrix(best_row["confusion_matrix"], best_alpha, cm_path)
+        mlflow.log_artifact(str(cm_path))
+        logger.info(f"✓ Confusion matrix saved to {cm_path}")
+        # Save optimal config
+        optimal_config = {
+            "ensemble": {
+                "alpha": float(best_alpha),
+                "hubert_weight": float(best_alpha),
+                "cnn_bilstm_weight": float(1 - best_alpha),
+                "validation_metrics": {
+                    "accuracy": float(best_row["accuracy"]),
+                    "f1": float(best_row["f1"]),
+                    "auc": float(best_row["auc"]),
+                    "sensitivity": float(best_row["sensitivity"]),
+                    "specificity": float(best_row["specificity"]),
+                },
+            }
+        }
+        config_path = output_dir / "optimal_ensemble_config.yaml"
+        with open(config_path, "w") as f:
+            yaml.dump(optimal_config, f, default_flow_style=False)
+        mlflow.log_artifact(str(config_path))
+        logger.info(f"✓ Optimal config saved to {config_path}")
+        logger.info("\n✓ Ensemble weight optimization complete!")
+        logger.info(f"  Update configs/model_config.yaml with alpha={best_alpha:.2f}")
+if __name__ == "__main__":
+    main()

training/train_hubert_fast.py ADDED Viewed

	@@ -0,0 +1,297 @@

+#!/usr/bin/env python3
+"""
+Fast fine-tuning script for HuBERT-SALR model.
+Optimizations:
+- Reduced dataset size (500-1000 samples)
+- Fewer epochs (5 instead of 20)
+- Simplified model architecture
+- Uses MPS/GPU acceleration
+- Faster feature extraction
+Usage:
+    python training/train_hubert_fast.py
+"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, Subset
+import numpy as np
+from tqdm import tqdm
+import logging
+import pandas as pd
+from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
+from transformers import HubertModel
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# ══════════════════════════════════════════════════════════════════════════════
+# Simplified HuBERT Model
+# ══════════════════════════════════════════════════════════════════════════════
+class SimplifiedHuBERTClassifier(nn.Module):
+    """Simplified HuBERT for faster training."""
+    def __init__(self, freeze_base=True):
+        super().__init__()
+        # Load pre-trained HuBERT (smaller version for speed)
+        logger.info("Loading HuBERT-base model...")
+        self.hubert = HubertModel.from_pretrained("facebook/hubert-base-ls960")
+        # Freeze base model for faster training
+        if freeze_base:
+            for param in self.hubert.parameters():
+                param.requires_grad = False
+            logger.info("✓ HuBERT base frozen (only training classifier)")
+        # Simple classifier head
+        hidden_size = self.hubert.config.hidden_size  # 768 for base
+        self.classifier = nn.Sequential(
+            nn.Linear(hidden_size, 256),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(256, 2),  # Binary: healthy vs dysarthric
+        )
+    def forward(self, input_values):
+        # Extract features
+        with torch.no_grad() if self.training else torch.enable_grad():
+            outputs = self.hubert(input_values)
+        # Pool: mean across time dimension
+        hidden_states = outputs.last_hidden_state  # (batch, time, hidden)
+        pooled = hidden_states.mean(dim=1)  # (batch, hidden)
+        # Classify
+        logits = self.classifier(pooled)
+        return logits
+# ══════════════════════════════════════════════════════════════════════════════
+# Fast Dataset (No Heavy Feature Extraction)
+# ══════════════════════════════════════════════════════════════════════════════
+class FastDysarthriaDataset(torch.utils.data.Dataset):
+    """Simplified dataset for fast training."""
+    def __init__(self, manifest_path, max_duration=10.0, sample_rate=16000):
+        self.manifest = pd.read_csv(manifest_path)
+        self.max_duration = max_duration
+        self.sample_rate = sample_rate
+        self.max_length = int(max_duration * sample_rate)
+        # Filter valid files
+        self.manifest = self.manifest[
+            (self.manifest['duration'] >= 5.0) &  # Min duration
+            (self.manifest['duration'] <= max_duration)  # Max duration
+        ].reset_index(drop=True)
+        logger.info(f"Dataset: {len(self.manifest)} samples (filtered for 5-10s duration)")
+    def __len__(self):
+        return len(self.manifest)
+    def __getitem__(self, idx):
+        row = self.manifest.iloc[idx]
+        # Load audio
+        import librosa
+        waveform, sr = librosa.load(row['file_path'], sr=self.sample_rate)
+        # Pad or truncate to fixed length
+        if len(waveform) > self.max_length:
+            waveform = waveform[:self.max_length]
+        else:
+            waveform = np.pad(waveform, (0, self.max_length - len(waveform)))
+        return {
+            'waveform': torch.FloatTensor(waveform),
+            'label': int(row['label']),
+        }
+# ══════════════════════════════════════════════════════════════════════════════
+# Training Functions
+# ══════════════════════════════════════════════════════════════════════════════
+def train_epoch(model, dataloader, optimizer, criterion, device):
+    """Train for one epoch."""
+    model.train()
+    total_loss = 0
+    all_preds = []
+    all_labels = []
+    for batch in tqdm(dataloader, desc="Training"):
+        waveform = batch["waveform"].to(device)
+        labels = batch["label"].to(device)
+        optimizer.zero_grad()
+        logits = model(waveform)
+        loss = criterion(logits, labels)
+        loss.backward()
+        optimizer.step()
+        total_loss += loss.item()
+        preds = torch.argmax(logits, dim=1).cpu().numpy()
+        all_preds.extend(preds)
+        all_labels.extend(labels.cpu().numpy())
+    avg_loss = total_loss / len(dataloader)
+    accuracy = accuracy_score(all_labels, all_preds)
+    f1 = f1_score(all_labels, all_preds, average="binary")
+    return avg_loss, accuracy, f1
+def validate(model, dataloader, criterion, device):
+    """Validate the model."""
+    model.eval()
+    total_loss = 0
+    all_preds = []
+    all_probs = []
+    all_labels = []
+    with torch.no_grad():
+        for batch in tqdm(dataloader, desc="Validating"):
+            waveform = batch["waveform"].to(device)
+            labels = batch["label"].to(device)
+            logits = model(waveform)
+            loss = criterion(logits, labels)
+            total_loss += loss.item()
+            probs = torch.softmax(logits, dim=1)[:, 1].cpu().numpy()
+            preds = torch.argmax(logits, dim=1).cpu().numpy()
+            all_preds.extend(preds)
+            all_probs.extend(probs)
+            all_labels.extend(labels.cpu().numpy())
+    avg_loss = total_loss / len(dataloader)
+    accuracy = accuracy_score(all_labels, all_preds)
+    f1 = f1_score(all_labels, all_preds, average="binary")
+    auc = roc_auc_score(all_labels, all_probs)
+    return avg_loss, accuracy, f1, auc
+# ══════════════════════════════════════════════════════════════════════════════
+# Main Training
+# ══════════════════════════════════════════════════════════════════════════════
+def main():
+    # Device selection
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+    elif torch.backends.mps.is_available():
+        device = torch.device("mps")
+    else:
+        device = torch.device("cpu")
+    logger.info(f"🚀 Using device: {device}")
+    # Load datasets
+    train_manifest = Path("data/manifests/train.csv")
+    val_manifest = Path("data/manifests/val.csv")
+    train_dataset = FastDysarthriaDataset(train_manifest, max_duration=10.0)
+    val_dataset = FastDysarthriaDataset(val_manifest, max_duration=10.0)
+    # Use subset for faster training
+    MAX_TRAIN_SAMPLES = 500  # Reduced from 3000
+    MAX_VAL_SAMPLES = 100    # Reduced from 647
+    if len(train_dataset) > MAX_TRAIN_SAMPLES:
+        indices = np.random.choice(len(train_dataset), MAX_TRAIN_SAMPLES, replace=False)
+        train_dataset = Subset(train_dataset, indices)
+        logger.info(f"✂️  Using subset: {MAX_TRAIN_SAMPLES} training samples")
+    if len(val_dataset) > MAX_VAL_SAMPLES:
+        indices = np.random.choice(len(val_dataset), MAX_VAL_SAMPLES, replace=False)
+        val_dataset = Subset(val_dataset, indices)
+        logger.info(f"✂️  Using subset: {MAX_VAL_SAMPLES} validation samples")
+    # Data loaders
+    train_loader = DataLoader(
+        train_dataset,
+        batch_size=4,  # Small batch for speed
+        shuffle=True,
+        num_workers=0,  # Avoid multiprocessing issues
+    )
+    val_loader = DataLoader(
+        val_dataset,
+        batch_size=4,
+        shuffle=False,
+        num_workers=0,
+    )
+    # Model
+    model = SimplifiedHuBERTClassifier(freeze_base=True).to(device)
+    logger.info(f"✓ Model loaded on {device}")
+    # Optimizer and loss
+    optimizer = optim.AdamW(model.classifier.parameters(), lr=1e-3)  # Higher LR for frozen base
+    criterion = nn.CrossEntropyLoss()
+    # Training loop
+    NUM_EPOCHS = 5  # Reduced from 20
+    best_val_auc = 0
+    best_model_path = Path("models/hubert_fast_best.pt")
+    best_model_path.parent.mkdir(parents=True, exist_ok=True)
+    logger.info(f"\n{'='*80}")
+    logger.info(f"  FAST TRAINING - {NUM_EPOCHS} epochs")
+    logger.info(f"{'='*80}\n")
+    for epoch in range(1, NUM_EPOCHS + 1):
+        logger.info(f"\nEpoch {epoch}/{NUM_EPOCHS}")
+        logger.info("-" * 40)
+        # Train
+        train_loss, train_acc, train_f1 = train_epoch(
+            model, train_loader, optimizer, criterion, device
+        )
+        # Validate
+        val_loss, val_acc, val_f1, val_auc = validate(
+            model, val_loader, criterion, device
+        )
+        # Log
+        logger.info(f"Train: Loss={train_loss:.4f}, Acc={train_acc:.4f}, F1={train_f1:.4f}")
+        logger.info(f"Val:   Loss={val_loss:.4f}, Acc={val_acc:.4f}, F1={val_f1:.4f}, AUC={val_auc:.4f}")
+        # Save best model
+        if val_auc > best_val_auc:
+            best_val_auc = val_auc
+            torch.save({
+                'epoch': epoch,
+                'model_state_dict': model.state_dict(),
+                'optimizer_state_dict': optimizer.state_dict(),
+                'val_auc': val_auc,
+            }, best_model_path)
+            logger.info(f"✓ New best model saved (AUC: {val_auc:.4f})")
+    logger.info(f"\n{'='*80}")
+    logger.info(f"  ✓ TRAINING COMPLETE!")
+    logger.info(f"{'='*80}")
+    logger.info(f"Best validation AUC: {best_val_auc:.4f}")
+    logger.info(f"Model saved to: {best_model_path}")
+    logger.info(f"\nNext steps:")
+    logger.info(f"  1. Test the model on test set")
+    logger.info(f"  2. Update model_registry.py to use this checkpoint")
+    logger.info(f"  3. Run inference on new audio files")
+if __name__ == "__main__":
+    main()

training/train_hubert_salr.py ADDED Viewed

	@@ -0,0 +1,225 @@

+#!/usr/bin/env python3
+"""Train HuBERT-SALR model for dysarthria detection."""
+import logging
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+from pathlib import Path
+import mlflow
+import yaml
+from training.dataset import DysarthriaDataset, collate_fn
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class HuBERTSALRModel(nn.Module):
+    """HuBERT with SALR head for dysarthria detection."""
+    def __init__(self, hubert_checkpoint="facebook/hubert-large-ll60k"):
+        super().__init__()
+        from transformers import HubertModel
+        # Load pretrained HuBERT
+        self.hubert = HubertModel.from_pretrained(hubert_checkpoint)
+        # Freeze feature extractor (optional)
+        for param in self.hubert.feature_extractor.parameters():
+            param.requires_grad = False
+        # Layer-weighted pooling (learnable weights for 24 layers)
+        self.layer_weights = nn.Parameter(torch.ones(24) / 24)
+        # SALR head
+        self.classifier = nn.Sequential(
+            nn.Linear(1024, 256),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(256, 2),  # Binary classification
+        )
+        self.embedder = nn.Sequential(
+            nn.Linear(1024, 256),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(256, 128),  # Embedding for triplet loss
+        )
+    def forward(self, waveform):
+        """Forward pass."""
+        # HuBERT encoding
+        outputs = self.hubert(waveform, output_hidden_states=True)
+        hidden_states = outputs.hidden_states  # (batch, seq_len, hidden_size) × 24 layers
+        # Layer-weighted pooling
+        weighted_hidden = torch.stack(
+            [self.layer_weights[i] * hidden_states[i] for i in range(24)],
+            dim=0
+        ).sum(dim=0)  # (batch, seq_len, 1024)
+        # Global average pooling
+        pooled = weighted_hidden.mean(dim=1)  # (batch, 1024)
+        # Classification logits
+        logits = self.classifier(pooled)
+        # Embeddings for triplet loss
+        embeddings = self.embedder(pooled)
+        embeddings = nn.functional.normalize(embeddings, p=2, dim=1)
+        return logits, embeddings
+def train_hubert_salr(
+    train_manifest="data/manifests/train_manifest.csv",
+    val_manifest="data/manifests/val_manifest.csv",
+    batch_size=8,
+    num_epochs=50,
+    learning_rate=1e-4,
+    device="cuda",
+):
+    """
+    Train HuBERT-SALR model.
+    Args:
+        train_manifest: Path to training manifest
+        val_manifest: Path to validation manifest
+        batch_size: Batch size
+        num_epochs: Number of epochs
+        learning_rate: Learning rate
+        device: Device (cuda/cpu)
+    """
+    # Set device
+    device = torch.device(device if torch.cuda.is_available() else "cpu")
+    logger.info(f"Using device: {device}")
+    # Initialize MLflow
+    mlflow.set_experiment("dysarthria_hubert_salr")
+    with mlflow.start_run():
+        # Log parameters
+        mlflow.log_params({
+            "model": "HuBERT-SALR",
+            "batch_size": batch_size,
+            "num_epochs": num_epochs,
+            "learning_rate": learning_rate,
+        })
+        # Create datasets
+        train_dataset = DysarthriaDataset(train_manifest, augment=True)
+        val_dataset = DysarthriaDataset(val_manifest, augment=False)
+        train_loader = DataLoader(
+            train_dataset,
+            batch_size=batch_size,
+            shuffle=True,
+            num_workers=0,  # Disabled for compatibility
+            collate_fn=collate_fn,
+        )
+        val_loader = DataLoader(
+            val_dataset,
+            batch_size=batch_size,
+            shuffle=False,
+            num_workers=0,  # Disabled for compatibility
+            collate_fn=collate_fn,
+        )
+        # Initialize model
+        model = HuBERTSALRModel().to(device)
+        # Optimizer
+        optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
+        # Losses
+        ce_loss_fn = nn.CrossEntropyLoss()
+        triplet_loss_fn = nn.TripletMarginLoss(margin=1.0)
+        # Training loop
+        best_val_loss = float("inf")
+        for epoch in range(num_epochs):
+            # Training
+            model.train()
+            train_loss = 0.0
+            for batch in train_loader:
+                waveform = batch["waveform"].to(device)
+                labels = batch["label"].squeeze(1).to(device)
+                optimizer.zero_grad()
+                # Forward pass
+                logits, embeddings = model(waveform)
+                # Classification loss
+                ce_loss = ce_loss_fn(logits, labels)
+                # Triplet loss (simplified: use random triplets)
+                # In full implementation, use hard negative mining
+                triplet_loss = torch.tensor(0.0).to(device)  # Placeholder
+                # Combined loss
+                loss = ce_loss + 0.5 * triplet_loss
+                # Backward pass
+                loss.backward()
+                optimizer.step()
+                train_loss += loss.item()
+            train_loss /= len(train_loader)
+            # Validation
+            model.eval()
+            val_loss = 0.0
+            correct = 0
+            total = 0
+            with torch.no_grad():
+                for batch in val_loader:
+                    waveform = batch["waveform"].to(device)
+                    labels = batch["label"].squeeze(1).to(device)
+                    logits, _ = model(waveform)
+                    loss = ce_loss_fn(logits, labels)
+                    val_loss += loss.item()
+                    preds = logits.argmax(dim=1)
+                    correct += (preds == labels).sum().item()
+                    total += labels.size(0)
+            val_loss /= len(val_loader)
+            val_acc = correct / total
+            # Log metrics
+            mlflow.log_metrics({
+                "train_loss": train_loss,
+                "val_loss": val_loss,
+                "val_accuracy": val_acc,
+            }, step=epoch)
+            logger.info(
+                f"Epoch {epoch+1}/{num_epochs}: "
+                f"train_loss={train_loss:.4f}, "
+                f"val_loss={val_loss:.4f}, "
+                f"val_acc={val_acc:.4f}"
+            )
+            # Save best model
+            if val_loss < best_val_loss:
+                best_val_loss = val_loss
+                checkpoint_path = Path("models/checkpoints/hubert_salr_best.pt")
+                checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
+                torch.save(model.state_dict(), checkpoint_path)
+                mlflow.log_artifact(str(checkpoint_path))
+        logger.info("Training complete!")
+if __name__ == "__main__":
+    train_hubert_salr()