Spaces:

mgbam
/

SundewAIHealth

Sleeping

App Files Files Community

mgbam commited on Dec 8, 2025

Commit

5ec9e9d

verified ·

1 Parent(s): cbecd45

Upload 5 files

Browse files

Files changed (5) hide show

app/ml/ast_adapter.py +28 -0
app/ml/gating.py +116 -0
app/ml/inference.py +68 -0
app/ml/model.py +31 -0
app/ml/train_ecg.py +173 -0

app/ml/ast_adapter.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from typing import Optional, Tuple, Type
+import torch
+def _apply_torch_amp_shims() -> None:
+    """
+    AST expects torch.amp.GradScaler/autocast (torch 2.3+); shim from torch.cuda.amp for 2.2.
+    """
+    if not hasattr(torch.amp, "GradScaler") and hasattr(torch.cuda, "amp"):
+        torch.amp.GradScaler = torch.cuda.amp.GradScaler  # type: ignore[attr-defined]
+    if not hasattr(torch.amp, "autocast") and hasattr(torch.cuda, "amp"):
+        torch.amp.autocast = torch.cuda.amp.autocast  # type: ignore[attr-defined]
+def load_ast_trainer() -> Tuple[Optional[Type[object]], Optional[Type[object]], Optional[Exception]]:
+    """
+    Try to import AdaptiveSparseTrainer and ASTConfig from adaptive-sparse-training.
+    Returns (trainer_cls, config_cls, error)
+    """
+    try:
+        _apply_torch_amp_shims()
+        from adaptive_sparse_training import AdaptiveSparseTrainer  # type: ignore
+        from adaptive_sparse_training.config import ASTConfig  # type: ignore
+        return AdaptiveSparseTrainer, ASTConfig, None
+    except Exception as exc:  # pragma: no cover - optional dependency
+        return None, None, exc

app/ml/gating.py ADDED Viewed

	@@ -0,0 +1,116 @@

+from typing import Any, Dict, List, Tuple
+from sundew.gating import gate_probability_with_hysteresis, significance_score
+def _clamp(value: float, low: float = 0.0, high: float = 1.0) -> float:
+    return max(low, min(high, value))
+def _window_features(window: List[float]) -> Dict[str, float]:
+    """
+    Compute basic features for a window to feed Sundew's significance score.
+    """
+    if not window:
+        return {"magnitude": 0.0, "anomaly_score": 0.0, "context_relevance": 0.0, "urgency": 0.0}
+    length = float(len(window))
+    mean = sum(window) / length
+    mean_abs = sum(abs(x) for x in window) / length
+    max_abs = max(abs(x) for x in window)
+    variance = sum((x - mean) ** 2 for x in window) / length
+    magnitude = _clamp(max_abs * 10.0, 0.0, 10.0) * 10.0  # 0..100 scale
+    anomaly_score = _clamp(variance / (variance + 1.0))
+    context_relevance = _clamp(mean_abs)
+    urgency = _clamp(mean_abs * 0.5)
+    return {
+        "magnitude": magnitude,
+        "anomaly_score": anomaly_score,
+        "context_relevance": context_relevance,
+        "urgency": urgency,
+    }
+def gate_signal(
+    signal: List[float],
+    window_size: int = 128,
+    step: int = 64,
+    threshold: float = 0.55,
+    temperature: float = 0.15,
+    return_windows: bool = False,
+    max_windows: int = 200,
+) -> Tuple[List[float], Dict[str, Any]]:
+    """
+    Apply Sundew gating over a sliding window to reduce workload.
+    Args:
+        signal: raw signal values
+        window_size: sliding window size
+        step: stride between windows
+        threshold: gate threshold
+        temperature: gate temperature (0=hard)
+        return_windows: include per-window metadata
+        max_windows: limit number of windows returned (for previews)
+    Returns:
+        gated_signal: flattened list of selected windows (original signal if nothing selected)
+        meta: gating metadata (counts, ratios, thresholds, optional windows)
+    """
+    if len(signal) < window_size:
+        meta = {
+            "total_windows": 0,
+            "selected_windows": 0,
+            "ratio": 1.0,
+            "threshold": threshold,
+            "temperature": temperature,
+        }
+        return signal, meta
+    last_activation = False
+    selected: List[float] = []
+    total_windows = 0
+    selected_windows = 0
+    windows_meta: List[Dict[str, Any]] = []
+    for start in range(0, len(signal) - window_size + 1, step):
+        window = signal[start : start + window_size]
+        total_windows += 1
+        features = _window_features(window)
+        sig = significance_score(features, w_mag=0.35, w_ano=0.4, w_ctx=0.15, w_urg=0.1)
+        prob = gate_probability_with_hysteresis(sig, threshold=threshold, temperature=temperature, last_activation=last_activation)
+        chosen = prob >= 0.5
+        if chosen:
+            selected.extend(window)
+            selected_windows += 1
+            last_activation = True
+        else:
+            last_activation = False
+        if return_windows and len(windows_meta) < max_windows:
+            windows_meta.append(
+                {
+                    "start": start,
+                    "end": start + window_size,
+                    "significance": sig,
+                    "probability": prob,
+                    "selected": chosen,
+                }
+            )
+    if not selected:
+        selected = signal  # fall back to full signal
+    meta: Dict[str, Any] = {
+        "total_windows": total_windows,
+        "selected_windows": selected_windows,
+        "ratio": len(selected) / max(len(signal), 1),
+        "threshold": threshold,
+        "temperature": temperature,
+    }
+    if return_windows:
+        meta["windows"] = windows_meta
+    return selected, meta

app/ml/inference.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os
+from typing import Any, Dict, List, Optional
+import torch
+from torch.nn import functional as F
+from app.core.config import settings
+from app.ml.model import ECGClassifier
+from app.ml.gating import gate_signal
+_model: ECGClassifier | None = None
+_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def load_model() -> ECGClassifier:
+    """
+    Lazy-load or initialize the ECG model.
+    In production, you would load trained weights.
+    """
+    global _model
+    if _model is None:
+        model = ECGClassifier(num_classes=2)
+        weights_path: Optional[str] = settings.MODEL_WEIGHTS_PATH
+        if weights_path and os.path.exists(weights_path):
+            state = torch.load(weights_path, map_location=_device)
+            model.load_state_dict(state)
+        model.to(_device)
+        model.eval()
+        _model = model
+    return _model
+@torch.no_grad()
+def infer_ecg(
+    signal: List[float],
+    original_len: Optional[int] = None,
+    gating_meta: Optional[Dict[str, Any]] = None,
+) -> Dict[str, float | str | int]:
+    """
+    Run model inference on a single ECG signal.
+    Returns a label and score.
+    """
+    model = load_model()
+    if not signal:
+        raise ValueError("Signal cannot be empty.")
+    tensor = torch.tensor(signal, dtype=torch.float32, device=_device).unsqueeze(0).unsqueeze(0)
+    logits = model(tensor)
+    probs = F.softmax(logits, dim=1)
+    score = float(probs[0, 1].item())
+    label = "arrhythmia" if score >= 0.5 else "normal"
+    # Dummy heart rate estimation as placeholder
+    hr_estimate = int(60 + 80 * score)
+    original_len = original_len or len(signal)
+    gating_ratio = len(signal) / max(original_len, 1)
+    result: Dict[str, float | str | int] = {
+        "label": label,
+        "score": score,
+        "hr": hr_estimate,
+        "gated_ratio": gating_ratio,
+    }
+    if gating_meta:
+        result["gating"] = gating_meta
+    return result

app/ml/model.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import torch
+from torch import nn
+class ECGClassifier(nn.Module):
+    """
+    Simple 1D CNN for ECG classification.
+    """
+    def __init__(self, num_classes: int = 2):
+        super().__init__()
+        self.features = nn.Sequential(
+            nn.Conv1d(1, 16, kernel_size=5, padding=2),
+            nn.BatchNorm1d(16),
+            nn.ReLU(inplace=True),
+            nn.MaxPool1d(kernel_size=2),
+            nn.Conv1d(16, 32, kernel_size=3, padding=1),
+            nn.BatchNorm1d(32),
+            nn.ReLU(inplace=True),
+            nn.AdaptiveAvgPool1d(1),
+        )
+        self.classifier = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(32, num_classes),
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # x shape: (batch, channels=1, length)
+        feats = self.features(x)
+        logits = self.classifier(feats)
+        return logits

app/ml/train_ecg.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import os
+from typing import List, Sequence, Tuple
+import torch
+from torch import nn, optim
+from torch.utils.data import DataLoader, Dataset
+from sqlalchemy import create_engine, select
+from sqlalchemy.orm import Session, sessionmaker
+from app.core.config import settings
+from app.ml.model import ECGClassifier
+from app.models.ecg import Base, ECGSample
+from app.ml.ast_adapter import load_ast_trainer
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+LABEL_TO_IDX = {"normal": 0, "arrhythmia": 1}
+AST_TRAINER, AST_CONFIG, AST_ERROR = load_ast_trainer()
+class ECGDataset(Dataset):
+    """
+    In-memory dataset built from ECGSample rows.
+    """
+    def __init__(self, samples: Sequence[ECGSample], max_len: int):
+        self.samples = samples
+        self.max_len = max_len
+        self.items: List[Tuple[torch.Tensor, int]] = []
+        for sample in samples:
+            signal = sample.signal or []
+            if not signal:
+                continue
+            tensor = torch.tensor(signal, dtype=torch.float32)
+            if tensor.numel() < self.max_len:
+                pad = self.max_len - tensor.numel()
+                tensor = torch.nn.functional.pad(tensor, (0, pad))
+            elif tensor.numel() > self.max_len:
+                tensor = tensor[: self.max_len]
+            # reshape to (channels=1, length)
+            tensor = tensor.unsqueeze(0)
+            label_idx = LABEL_TO_IDX.get(sample.label or "normal", 0)
+            self.items.append((tensor, label_idx))
+    def __len__(self) -> int:
+        return len(self.items)
+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
+        return self.items[idx]
+def load_samples() -> List[ECGSample]:
+    """
+    Load all ECGSample rows from the configured database.
+    Ensures tables exist before querying.
+    """
+    engine = create_engine(settings.DATABASE_URL, future=True)
+    SessionLocal = sessionmaker(bind=engine)
+    Base.metadata.create_all(bind=engine)
+    with SessionLocal() as session:
+        result = session.execute(select(ECGSample))
+        rows = result.scalars().all()
+    engine.dispose()
+    return list(rows)
+def train_model(dataset: Dataset, epochs: int = 3, batch_size: int = 8, lr: float = 1e-3) -> ECGClassifier:
+    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
+    model = ECGClassifier(num_classes=len(LABEL_TO_IDX)).to(device)
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), lr=lr)
+    model.train()
+    for epoch in range(epochs):
+        running_loss = 0.0
+        for batch_x, batch_y in loader:
+            batch_x = batch_x.to(device)
+            batch_y = batch_y.to(device)
+            optimizer.zero_grad()
+            logits = model(batch_x)
+            loss = criterion(logits, batch_y)
+            loss.backward()
+            optimizer.step()
+            running_loss += loss.item() * batch_x.size(0)
+        epoch_loss = running_loss / max(len(dataset), 1)
+        print(f"Epoch {epoch + 1}/{epochs} - loss: {epoch_loss:.4f}")
+    model.eval()
+    return model
+def save_weights(model: ECGClassifier) -> str:
+    """
+    Save model weights to the configured path (or default).
+    """
+    path = settings.MODEL_WEIGHTS_PATH or "./checkpoints/ecg_classifier.pt"
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    torch.save(model.state_dict(), path)
+    return path
+def build_dataloader(dataset: Dataset, batch_size: int = 8) -> DataLoader:
+    return DataLoader(dataset, batch_size=batch_size, shuffle=True)
+def generate_synthetic_samples() -> List[ECGSample]:
+    """
+    Create a tiny synthetic dataset if the DB is empty (not persisted).
+    """
+    import math
+    class SyntheticSample:
+        def __init__(self, signal: List[float], label: str):
+            self.signal = signal
+            self.label = label
+    t = [i / 50.0 for i in range(256)]
+    normal = [0.05 * math.sin(2 * math.pi * f) for f in t]
+    arrhythmia = [0.3 * math.sin(2 * math.pi * f * 3) + 0.1 * math.sin(2 * math.pi * f * 7) for f in t]
+    return [
+        SyntheticSample(normal, "normal"),
+        SyntheticSample(arrhythmia, "arrhythmia"),
+    ]
+def main() -> None:
+    samples = load_samples()
+    if not samples:
+        print("No ECG samples found in the database. Using synthetic samples for a minimal run.")
+        samples = generate_synthetic_samples()
+    max_len = max(len(sample.signal or []) for sample in samples)
+    if max_len == 0:
+        print("ECG samples contain empty signals; cannot train.")
+        return
+    dataset = ECGDataset(samples, max_len=max_len)
+    if len(dataset) == 0:
+        print("Dataset is empty after filtering; cannot train.")
+        return
+    train_loader = build_dataloader(dataset)
+    model = ECGClassifier(num_classes=len(LABEL_TO_IDX)).to(device)
+    if AST_TRAINER and AST_CONFIG:
+        cfg = AST_CONFIG(
+            target_activation_rate=0.4,
+            initial_threshold=2.5,
+            adapt_kp=0.005,
+            adapt_ki=0.0001,
+            ema_alpha=0.1,
+            energy_per_activation=1.0,
+            energy_per_skip=0.01,
+            use_amp=False,  # CPU-only by default here
+            device=device.type,
+        )
+        optimizer = optim.Adam(model.parameters(), lr=1e-3)
+        criterion = nn.CrossEntropyLoss(reduction="none")
+        trainer = AST_TRAINER(model, train_loader, train_loader, cfg, optimizer=optimizer, criterion=criterion)
+        trainer.train(epochs=3, warmup_epochs=0)
+        print("Adaptive Sparse Training completed.")
+    else:
+        if AST_ERROR:
+            print(f"Adaptive Sparse Training not active (optional): {AST_ERROR}")
+        model = train_model(dataset)
+    weights_path = save_weights(model)
+    print(f"Training complete. Weights saved to {weights_path}")
+if __name__ == "__main__":
+    main()