Spaces:

Bachstelze
/

pose-deep-learning

Running

App Files Files Community

Bachstelze commited on 20 days ago

Commit

73f28de

1 Parent(s): 94ac6b0

readd keras models

Browse files

Files changed (13) hide show

A13/A13_DeepLearning_Report.ipynb +3 -0
A13/dl_models/__init__.py +1 -0
A13/dl_models/data_loader.py +108 -0
A13/dl_models/evaluate.py +68 -0
A13/dl_models/models.py +121 -0
A13/dl_models/predict.py +120 -0
A13/dl_models/saved/A_CNN.keras +3 -0
A13/dl_models/saved/A_Dense.keras +3 -0
A13/dl_models/saved/B_CNN.keras +3 -0
A13/dl_models/saved/B_Dense.keras +3 -0
A13/dl_models/saved/cv_summary.json +98 -0
A13/dl_models/saved/training_summary.json +56 -0
A13/dl_models/train.py +187 -0

A13/A13_DeepLearning_Report.ipynb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2fc1b5f814669ca0e4161b02a6aa29bede0e249da41695bd08473f7ce8088640
+size 100045

A13/dl_models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Deep Learning models for Issue #10 (problems A and B, Dense and CNN)."""

A13/dl_models/data_loader.py ADDED Viewed

	@@ -0,0 +1,108 @@

+"""Load the prepared classification data produced in Issue #9.
+The directory ``A13/classification_problems/prepared_data`` contains:
+* ``{P}_{M}_train_X.npy``       original train features
+* ``{P}_{M}_train_y.npy``       original train labels
+* ``{P}_{M}_train_aug_X.npy``   augmented train features (incl. originals)
+* ``{P}_{M}_train_aug_y.npy``   augmented train labels
+* ``{P}_{M}_test_X.npy``        held-out test features
+* ``{P}_{M}_test_y.npy``        held-out test labels
+* ``{P}_{M}_*_filenames.npy``   the source clip name (used to keep all
+                                augmentations of one clip in the same CV fold)
+with ``P in {A, B}`` and ``M in {Dense, CNN}``.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+import numpy as np
+# Resolve the prepared_data directory relative to this file so that the
+# package works no matter from where the notebook / script is launched.
+_THIS_DIR = Path(__file__).resolve().parent
+DATA_DIR = (_THIS_DIR.parent / "classification_problems" / "prepared_data").resolve()
+@dataclass
+class Dataset:
+    """Container holding all arrays for one (problem, model) combination."""
+    problem: str          # "A" or "B"
+    model_kind: str       # "Dense" or "CNN"
+    X_train: np.ndarray   # original (un-augmented) train features
+    y_train: np.ndarray
+    X_train_aug: np.ndarray  # augmented train features (used for fitting)
+    y_train_aug: np.ndarray
+    train_groups: np.ndarray  # source-clip id per augmented train sample
+    X_test: np.ndarray
+    y_test: np.ndarray
+    test_filenames: np.ndarray
+    @property
+    def input_shape(self) -> tuple[int, ...]:
+        return self.X_train_aug.shape[1:]
+    def summary(self) -> str:
+        return (
+            f"Problem {self.problem} / {self.model_kind}: "
+            f"train_aug={self.X_train_aug.shape}, "
+            f"test={self.X_test.shape}, "
+            f"pos_train={int(self.y_train_aug.sum())}/{len(self.y_train_aug)}, "
+            f"pos_test={int(self.y_test.sum())}/{len(self.y_test)}"
+        )
+def _load(name: str) -> np.ndarray:
+    path = DATA_DIR / f"{name}.npy"
+    return np.load(path, allow_pickle=True)
+# Augmentation suffixes appended to source-clip filenames in the prepared data.
+# The CV must group all augmented copies of one source clip together, so we
+# strip these suffixes to recover the original clip id (e.g. ``A1_mirror`` -> ``A1``).
+_AUG_SUFFIXES = ("_mirror", "_rotate_pos", "_rotate_neg", "_stretch")
+def _source_clip_ids(filenames: np.ndarray) -> np.ndarray:
+    out = np.empty(len(filenames), dtype=object)
+    for i, name in enumerate(filenames):
+        s = str(name)
+        for suf in _AUG_SUFFIXES:
+            if s.endswith(suf):
+                s = s[: -len(suf)]
+                break
+        out[i] = s
+    return out
+def load_dataset(problem: str, model_kind: str) -> Dataset:
+    """Load arrays for problem ``A``/``B`` and ``Dense``/``CNN``."""
+    if problem not in {"A", "B"}:
+        raise ValueError(f"problem must be 'A' or 'B', got {problem!r}")
+    if model_kind not in {"Dense", "CNN"}:
+        raise ValueError(f"model_kind must be 'Dense' or 'CNN', got {model_kind!r}")
+    prefix = f"{problem}_{model_kind}"
+    return Dataset(
+        problem=problem,
+        model_kind=model_kind,
+        X_train=_load(f"{prefix}_train_X").astype("float32"),
+        y_train=_load(f"{prefix}_train_y").astype("int32"),
+        X_train_aug=_load(f"{prefix}_train_aug_X").astype("float32"),
+        y_train_aug=_load(f"{prefix}_train_aug_y").astype("int32"),
+        train_groups=_source_clip_ids(_load(f"{prefix}_train_aug_filenames")),
+        X_test=_load(f"{prefix}_test_X").astype("float32"),
+        y_test=_load(f"{prefix}_test_y").astype("int32"),
+        test_filenames=_load(f"{prefix}_test_filenames"),
+    )
+def load_all() -> dict[tuple[str, str], Dataset]:
+    """Convenience helper returning the four datasets keyed by (problem, kind)."""
+    return {(p, m): load_dataset(p, m) for p in ("A", "B") for m in ("Dense", "CNN")}

A13/dl_models/evaluate.py ADDED Viewed

	@@ -0,0 +1,68 @@

+"""Evaluation helpers (confusion matrix, metrics tables, plots)."""
+from __future__ import annotations
+from typing import Iterable
+import numpy as np
+import tensorflow as tf
+from sklearn.metrics import (
+    accuracy_score,
+    confusion_matrix,
+    precision_score,
+    recall_score,
+    roc_auc_score,
+)
+METRIC_KEYS = ["tp", "fp", "tn", "fn", "accuracy", "precision", "recall", "auc"]
+def predict_proba(model: tf.keras.Model, X: np.ndarray) -> np.ndarray:
+    return model.predict(X, verbose=0).reshape(-1)
+def metrics_from_predictions(
+    y_true: np.ndarray, y_proba: np.ndarray, threshold: float = 0.5
+) -> dict[str, float]:
+    y_pred = (y_proba >= threshold).astype(int)
+    cm = confusion_matrix(y_true, y_pred, labels=[0, 1])
+    tn, fp, fn, tp = cm.ravel()
+    auc = float("nan")
+    if len(np.unique(y_true)) > 1:
+        auc = roc_auc_score(y_true, y_proba)
+    return {
+        "tp": int(tp), "fp": int(fp), "tn": int(tn), "fn": int(fn),
+        "accuracy": accuracy_score(y_true, y_pred),
+        "precision": precision_score(y_true, y_pred, zero_division=0),
+        "recall": recall_score(y_true, y_pred, zero_division=0),
+        "auc": auc,
+    }
+def confusion(y_true: np.ndarray, y_proba: np.ndarray, threshold: float = 0.5) -> np.ndarray:
+    return confusion_matrix(y_true, (y_proba >= threshold).astype(int), labels=[0, 1])
+def plot_confusion(cm: np.ndarray, title: str, ax=None):
+    import matplotlib.pyplot as plt
+    if ax is None:
+        _, ax = plt.subplots(figsize=(3, 3))
+    ax.imshow(cm, cmap="Blues")
+    ax.set_xticks([0, 1]); ax.set_yticks([0, 1])
+    ax.set_xticklabels(["bad", "good"]); ax.set_yticklabels(["bad", "good"])
+    ax.set_xlabel("predicted"); ax.set_ylabel("true")
+    ax.set_title(title)
+    for i in range(2):
+        for j in range(2):
+            ax.text(j, i, int(cm[i, j]), ha="center", va="center",
+                    color="white" if cm[i, j] > cm.max() / 2 else "black")
+    return ax
+def metrics_table(rows: Iterable[dict], index: Iterable[str]):
+    import pandas as pd
+    df = pd.DataFrame(list(rows), index=list(index))
+    cols = [c for c in METRIC_KEYS if c in df.columns]
+    return df[cols].round(4)

A13/dl_models/models.py ADDED Viewed

	@@ -0,0 +1,121 @@

+"""Model factories for Issue #10.
+Two architectures are provided per problem:
+* :func:`build_dense` -- multi-layer perceptron over the flattened sequence.
+* :func:`build_cnn`   -- small Conv2D-over-(time, joint) network. The default
+  hyper-parameters were chosen so that the CNN has at most ~20 % of the
+  parameters of the Dense baseline (verified by :func:`assert_param_budget`).
+All models output a single sigmoid logit (good=1 / bad=0) and are compiled
+with ``binary_crossentropy`` plus the metrics required by issue #10:
+True/False Positives & Negatives, AUC, BinaryAccuracy, Precision, Recall.
+"""
+from __future__ import annotations
+from typing import Sequence
+import tensorflow as tf
+from tensorflow.keras import layers, models, regularizers
+# --------------------------------------------------------------------------- #
+# Metrics & compile helper                                                    #
+# --------------------------------------------------------------------------- #
+def make_metrics() -> list[tf.keras.metrics.Metric]:
+    return [
+        tf.keras.metrics.TruePositives(name="tp"),
+        tf.keras.metrics.FalsePositives(name="fp"),
+        tf.keras.metrics.TrueNegatives(name="tn"),
+        tf.keras.metrics.FalseNegatives(name="fn"),
+        tf.keras.metrics.BinaryAccuracy(name="accuracy"),
+        tf.keras.metrics.Precision(name="precision"),
+        tf.keras.metrics.Recall(name="recall"),
+        tf.keras.metrics.AUC(name="auc"),
+    ]
+def compile_model(model: tf.keras.Model, learning_rate: float = 1e-3) -> tf.keras.Model:
+    model.compile(
+        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
+        loss="binary_crossentropy",
+        metrics=make_metrics(),
+    )
+    return model
+# --------------------------------------------------------------------------- #
+# Architectures                                                               #
+# --------------------------------------------------------------------------- #
+def build_dense(
+    input_dim: int,
+    hidden_units: Sequence[int] = (128, 64, 32),
+    dropout: float = 0.3,
+    l2: float = 1e-4,
+    learning_rate: float = 1e-3,
+    name: str = "dense",
+) -> tf.keras.Model:
+    """MLP for flattened sequences (Dense approach)."""
+    reg = regularizers.l2(l2) if l2 else None
+    inputs = layers.Input(shape=(input_dim,), name="features")
+    x = layers.BatchNormalization()(inputs)
+    for i, units in enumerate(hidden_units):
+        x = layers.Dense(units, activation="relu", kernel_regularizer=reg, name=f"fc{i+1}")(x)
+        if dropout:
+            x = layers.Dropout(dropout)(x)
+    output = layers.Dense(1, activation="sigmoid", name="prob")(x)
+    return compile_model(models.Model(inputs, output, name=name), learning_rate)
+def build_cnn(
+    input_shape: tuple[int, int, int],
+    filters: Sequence[int] = (8, 16),
+    kernel_size: tuple[int, int] = (3, 3),
+    dense_units: int = 16,
+    dropout: float = 0.3,
+    l2: float = 1e-4,
+    learning_rate: float = 1e-3,
+    name: str = "cnn",
+) -> tf.keras.Model:
+    """Compact 2D CNN over (time, joint, coordinate) tensors.
+    The default ``filters`` and ``dense_units`` produce <20 % of the Dense
+    baseline's parameters for both problem A and problem B.
+    """
+    reg = regularizers.l2(l2) if l2 else None
+    inputs = layers.Input(shape=input_shape, name="sequence")
+    x = layers.BatchNormalization()(inputs)
+    for i, f in enumerate(filters):
+        x = layers.Conv2D(
+            f, kernel_size=kernel_size, padding="same", activation="relu",
+            kernel_regularizer=reg, name=f"conv{i+1}",
+        )(x)
+        # only pool on the time axis; joint axis is small (13).
+        x = layers.MaxPool2D(pool_size=(2, 1), name=f"pool{i+1}")(x)
+    x = layers.GlobalAveragePooling2D(name="gap")(x)
+    if dense_units:
+        x = layers.Dense(dense_units, activation="relu", kernel_regularizer=reg, name="fc")(x)
+        if dropout:
+            x = layers.Dropout(dropout)(x)
+    output = layers.Dense(1, activation="sigmoid", name="prob")(x)
+    return compile_model(models.Model(inputs, output, name=name), learning_rate)
+# --------------------------------------------------------------------------- #
+# Parameter budget                                                            #
+# --------------------------------------------------------------------------- #
+def count_params(model: tf.keras.Model) -> int:
+    return int(model.count_params())
+def assert_param_budget(dense: tf.keras.Model, cnn: tf.keras.Model, ratio: float = 0.20) -> None:
+    """Raise if the CNN exceeds ``ratio`` × Dense parameter count."""
+    d, c = count_params(dense), count_params(cnn)
+    if c > ratio * d:
+        raise AssertionError(
+            f"CNN has {c} parameters which exceeds {ratio:.0%} of Dense's {d} "
+            f"({c / d:.1%}). Reduce CNN filters/dense_units."
+        )

A13/dl_models/predict.py ADDED Viewed

	@@ -0,0 +1,120 @@

+"""Inference helpers and a small CLI so the model is easy to (re)use.
+Examples
+--------
+Train + save all four models::
+    python -m A13.dl_models.predict train --out A13/dl_models/saved
+Predict on a NumPy array of features::
+    python -m A13.dl_models.predict run --model A13/dl_models/saved/A_Dense.keras \\
+                                        --X my_features.npy
+"""
+from __future__ import annotations
+import argparse
+import json
+from pathlib import Path
+import numpy as np
+import tensorflow as tf
+from .data_loader import load_all, load_dataset
+from .models import build_dense, build_cnn, count_params, assert_param_budget
+from .train import train_final
+from .evaluate import predict_proba, metrics_from_predictions
+SAVED_DIR = Path(__file__).resolve().parent / "saved"
+def _builders(dataset):
+    if dataset.model_kind == "Dense":
+        return lambda: build_dense(input_dim=dataset.input_shape[0], name=f"{dataset.problem}_Dense")
+    return lambda: build_cnn(input_shape=dataset.input_shape, name=f"{dataset.problem}_CNN")
+def train_all(out_dir: Path = SAVED_DIR, epochs: int = 120, verbose: int = 1) -> dict:
+    """Train Dense + CNN for both problems and save them.
+    Also asserts the CNN parameter budget (<= 20% of Dense) per problem.
+    """
+    out_dir = Path(out_dir); out_dir.mkdir(parents=True, exist_ok=True)
+    datasets = load_all()
+    summary: dict[str, dict] = {}
+    # --- parameter budget check ------------------------------------------------
+    for problem in ("A", "B"):
+        d = build_dense(input_dim=datasets[(problem, "Dense")].input_shape[0])
+        c = build_cnn(input_shape=datasets[(problem, "CNN")].input_shape)
+        assert_param_budget(d, c, ratio=0.20)
+        summary[f"{problem}_param_counts"] = {
+            "dense": count_params(d), "cnn": count_params(c),
+            "ratio": count_params(c) / count_params(d),
+        }
+    # --- train + save ----------------------------------------------------------
+    for (problem, kind), dataset in datasets.items():
+        if verbose:
+            print(f"== training {problem} / {kind} ==  {dataset.summary()}")
+        result = train_final(
+            dataset, _builders(dataset), epochs=epochs, verbose=verbose,
+            save_path=out_dir / f"{problem}_{kind}.keras",
+        )
+        summary[f"{problem}_{kind}_test_metrics"] = result.test_metrics
+    (out_dir / "training_summary.json").write_text(json.dumps(summary, indent=2))
+    return summary
+def predict(model_path: Path | str, X: np.ndarray, threshold: float = 0.5):
+    model = tf.keras.models.load_model(model_path)
+    proba = predict_proba(model, X)
+    return proba, (proba >= threshold).astype(int)
+def evaluate_saved(model_path: Path | str, problem: str, model_kind: str) -> dict:
+    """Re-evaluate a saved model on the official held-out test set."""
+    ds = load_dataset(problem, model_kind)
+    proba, _ = predict(model_path, ds.X_test)
+    return metrics_from_predictions(ds.y_test, proba)
+# --------------------------------------------------------------------------- #
+# CLI                                                                          #
+# --------------------------------------------------------------------------- #
+def _cli() -> None:
+    parser = argparse.ArgumentParser(description="Train / use Issue #10 models.")
+    sub = parser.add_subparsers(dest="cmd", required=True)
+    p_train = sub.add_parser("train", help="Train all four models.")
+    p_train.add_argument("--out", default=str(SAVED_DIR))
+    p_train.add_argument("--epochs", type=int, default=120)
+    p_eval = sub.add_parser("eval", help="Evaluate a saved model on its test set.")
+    p_eval.add_argument("--model", required=True)
+    p_eval.add_argument("--problem", required=True, choices=["A", "B"])
+    p_eval.add_argument("--kind", required=True, choices=["Dense", "CNN"])
+    p_run = sub.add_parser("run", help="Run inference on a .npy feature array.")
+    p_run.add_argument("--model", required=True)
+    p_run.add_argument("--X", required=True)
+    p_run.add_argument("--threshold", type=float, default=0.5)
+    args = parser.parse_args()
+    if args.cmd == "train":
+        summary = train_all(Path(args.out), epochs=args.epochs)
+        print(json.dumps(summary, indent=2))
+    elif args.cmd == "eval":
+        print(json.dumps(evaluate_saved(args.model, args.problem, args.kind), indent=2))
+    elif args.cmd == "run":
+        X = np.load(args.X)
+        proba, pred = predict(args.model, X, threshold=args.threshold)
+        for p, q in zip(proba, pred):
+            print(f"{p:.4f}\t{int(q)}")
+if __name__ == "__main__":
+    _cli()

A13/dl_models/saved/A_CNN.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2b94a87774a05ecd05cacf4bc3c04d3636ed9853d1467c1926bc86f6a362e6d
+size 71378

A13/dl_models/saved/A_Dense.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:58e5873c14fb6f1b30c3df7e1f3a14b60700b1c66fd34fc1463d2d0513caf683
+size 785808

A13/dl_models/saved/B_CNN.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5f4cc63783b17d5c406ba06c5bc5de7b06dd2254b43a8f7037080c715552eae
+size 70482

A13/dl_models/saved/B_Dense.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01835e1f426ddd3b160a3162d601f93d969ba7789edd2323c7d502e6fdb1c809
+size 581971

A13/dl_models/saved/cv_summary.json ADDED Viewed

	@@ -0,0 +1,98 @@

+{
+  "A_Dense": {
+    "mean": {
+      "accuracy": 1.0,
+      "auc": 1.0,
+      "fn": 0.0,
+      "fp": 0.0,
+      "loss": 0.029864558018743992,
+      "precision": 1.0,
+      "recall": 1.0,
+      "tn": 17.0,
+      "tp": 28.5
+    },
+    "std": {
+      "accuracy": 0.0,
+      "auc": 0.0,
+      "fn": 0.0,
+      "fp": 0.0,
+      "loss": 0.0003843123911400311,
+      "precision": 0.0,
+      "recall": 0.0,
+      "tn": 1.0,
+      "tp": 0.9219544457292888
+    }
+  },
+  "A_CNN": {
+    "mean": {
+      "accuracy": 0.9274879336357117,
+      "auc": 0.9561430215835571,
+      "fn": 3.3,
+      "fp": 0.0,
+      "loss": 0.19682206511497496,
+      "precision": 1.0,
+      "recall": 0.8842802405357361,
+      "tn": 17.0,
+      "tp": 25.2
+    },
+    "std": {
+      "accuracy": 0.01719623343302091,
+      "auc": 0.025368922288748794,
+      "fn": 0.7810249675906654,
+      "fp": 0.0,
+      "loss": 0.028594990244631205,
+      "precision": 0.0,
+      "recall": 0.026408634136469537,
+      "tn": 1.0,
+      "tp": 1.0770329614269007
+    }
+  },
+  "B_Dense": {
+    "mean": {
+      "accuracy": 0.997826087474823,
+      "auc": 1.0,
+      "fn": 0.1,
+      "fp": 0.0,
+      "loss": 0.03609825950115919,
+      "precision": 1.0,
+      "recall": 0.9965517222881317,
+      "tn": 17.0,
+      "tp": 28.4
+    },
+    "std": {
+      "accuracy": 0.006521737575531006,
+      "auc": 0.0,
+      "fn": 0.30000000000000004,
+      "fp": 0.0,
+      "loss": 0.014322467489723906,
+      "precision": 0.0,
+      "recall": 0.010344833135604858,
+      "tn": 1.0,
+      "tp": 0.9165151389911681
+    }
+  },
+  "B_CNN": {
+    "mean": {
+      "accuracy": 0.9274879336357117,
+      "auc": 0.9576378405094147,
+      "fn": 3.3,
+      "fp": 0.0,
+      "loss": 0.19666245728731155,
+      "precision": 1.0,
+      "recall": 0.8842802405357361,
+      "tn": 17.0,
+      "tp": 25.2
+    },
+    "std": {
+      "accuracy": 0.01719623343302091,
+      "auc": 0.02392501041855642,
+      "fn": 0.7810249675906654,
+      "fp": 0.0,
+      "loss": 0.027519047326329205,
+      "precision": 0.0,
+      "recall": 0.026408634136469537,
+      "tn": 1.0,
+      "tp": 1.0770329614269007
+    }
+  }
+}

A13/dl_models/saved/training_summary.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "A_param_counts": {
+    "dense": 61977,
+    "cnn": 1693,
+    "ratio": 0.02731658518482663
+  },
+  "B_param_counts": {
+    "dense": 44817,
+    "cnn": 1617,
+    "ratio": 0.03608005890621862
+  },
+  "A_Dense_test_metrics": {
+    "accuracy": 0.9130434989929199,
+    "auc": 0.9365079402923584,
+    "fn": 1.0,
+    "fp": 1.0,
+    "loss": 0.5623016953468323,
+    "precision": 0.9285714030265808,
+    "recall": 0.9285714030265808,
+    "tn": 8.0,
+    "tp": 13.0
+  },
+  "A_CNN_test_metrics": {
+    "accuracy": 0.95652174949646,
+    "auc": 0.964285671710968,
+    "fn": 1.0,
+    "fp": 0.0,
+    "loss": 0.15785124897956848,
+    "precision": 1.0,
+    "recall": 0.9285714030265808,
+    "tn": 9.0,
+    "tp": 13.0
+  },
+  "B_Dense_test_metrics": {
+    "accuracy": 0.9130434989929199,
+    "auc": 0.9365079402923584,
+    "fn": 1.0,
+    "fp": 1.0,
+    "loss": 0.6157440543174744,
+    "precision": 0.9285714030265808,
+    "recall": 0.9285714030265808,
+    "tn": 8.0,
+    "tp": 13.0
+  },
+  "B_CNN_test_metrics": {
+    "accuracy": 0.95652174949646,
+    "auc": 0.9722222685813904,
+    "fn": 1.0,
+    "fp": 0.0,
+    "loss": 0.15502068400382996,
+    "precision": 1.0,
+    "recall": 0.9285714030265808,
+    "tn": 9.0,
+    "tp": 13.0
+  }
+}

A13/dl_models/train.py ADDED Viewed

	@@ -0,0 +1,187 @@

+"""Training and cross-validation utilities for Issue #10.
+Supports:
+* a single train/test fit (``train_final``)
+* 10-fold *grouped* cross-validation that keeps all augmentations of the same
+  original clip in the same fold (``cross_validate``)
+* small grid search over a few hyper-parameter combinations (``grid_search``)
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from itertools import product
+from pathlib import Path
+from typing import Callable, Iterable
+import numpy as np
+import tensorflow as tf
+from sklearn.model_selection import GroupKFold
+from .data_loader import Dataset
+from . import models as _models
+# --------------------------------------------------------------------------- #
+# Common training helpers                                                     #
+# --------------------------------------------------------------------------- #
+def _callbacks(patience: int = 15) -> list[tf.keras.callbacks.Callback]:
+    return [
+        tf.keras.callbacks.EarlyStopping(
+            monitor="val_loss", patience=patience, restore_best_weights=True
+        ),
+        tf.keras.callbacks.ReduceLROnPlateau(
+            monitor="val_loss", factor=0.5, patience=max(3, patience // 3), min_lr=1e-5
+        ),
+    ]
+def class_weight(y: np.ndarray) -> dict[int, float]:
+    pos = float(y.sum())
+    neg = float(len(y) - pos)
+    if pos == 0 or neg == 0:
+        return {0: 1.0, 1: 1.0}
+    total = pos + neg
+    return {0: total / (2 * neg), 1: total / (2 * pos)}
+def _evaluate(model: tf.keras.Model, X: np.ndarray, y: np.ndarray) -> dict[str, float]:
+    out = model.evaluate(X, y, verbose=0, return_dict=True)
+    return {k: float(v) for k, v in out.items()}
+# --------------------------------------------------------------------------- #
+# Grouped cross-validation                                                    #
+# --------------------------------------------------------------------------- #
+@dataclass
+class CVResult:
+    fold_metrics: list[dict[str, float]]
+    mean: dict[str, float]
+    std: dict[str, float]
+def cross_validate(
+    dataset: Dataset,
+    build_fn: Callable[[], tf.keras.Model],
+    n_splits: int = 10,
+    epochs: int = 80,
+    batch_size: int = 32,
+    use_class_weight: bool = True,
+    verbose: int = 0,
+) -> CVResult:
+    """Run grouped K-fold CV on ``dataset.X_train_aug``.
+    Splits use ``dataset.train_groups`` so all augmented copies of one
+    original clip stay in the same fold, as required by issue #10.
+    """
+    X, y, groups = dataset.X_train_aug, dataset.y_train_aug, dataset.train_groups
+    n_splits = min(n_splits, len(np.unique(groups)))
+    gkf = GroupKFold(n_splits=n_splits)
+    fold_metrics: list[dict[str, float]] = []
+    for fold, (train_idx, val_idx) in enumerate(gkf.split(X, y, groups), start=1):
+        tf.keras.backend.clear_session()
+        model = build_fn()
+        cw = class_weight(y[train_idx]) if use_class_weight else None
+        model.fit(
+            X[train_idx], y[train_idx],
+            validation_data=(X[val_idx], y[val_idx]),
+            epochs=epochs, batch_size=batch_size,
+            callbacks=_callbacks(),
+            class_weight=cw,
+            verbose=verbose,
+        )
+        m = _evaluate(model, X[val_idx], y[val_idx])
+        m["fold"] = fold
+        fold_metrics.append(m)
+        if verbose:
+            print(f"  fold {fold:2d}: auc={m['auc']:.3f} acc={m['accuracy']:.3f}")
+    keys = [k for k in fold_metrics[0] if k != "fold"]
+    mean = {k: float(np.mean([f[k] for f in fold_metrics])) for k in keys}
+    std = {k: float(np.std([f[k] for f in fold_metrics])) for k in keys}
+    return CVResult(fold_metrics=fold_metrics, mean=mean, std=std)
+# --------------------------------------------------------------------------- #
+# Final fit on all augmented training data                                    #
+# --------------------------------------------------------------------------- #
+@dataclass
+class TrainResult:
+    model: tf.keras.Model
+    history: dict[str, list[float]]
+    test_metrics: dict[str, float]
+def train_final(
+    dataset: Dataset,
+    build_fn: Callable[[], tf.keras.Model],
+    epochs: int = 120,
+    batch_size: int = 32,
+    val_fraction: float = 0.15,
+    use_class_weight: bool = True,
+    verbose: int = 0,
+    save_path: Path | str | None = None,
+) -> TrainResult:
+    X, y, groups = dataset.X_train_aug, dataset.y_train_aug, dataset.train_groups
+    # Hold out a single grouped validation split for early stopping.
+    n_groups = len(np.unique(groups))
+    n_val = max(1, int(round(n_groups * val_fraction)))
+    gkf = GroupKFold(n_splits=max(2, n_groups // n_val))
+    train_idx, val_idx = next(iter(gkf.split(X, y, groups)))
+    tf.keras.backend.clear_session()
+    model = build_fn()
+    cw = class_weight(y[train_idx]) if use_class_weight else None
+    history = model.fit(
+        X[train_idx], y[train_idx],
+        validation_data=(X[val_idx], y[val_idx]),
+        epochs=epochs, batch_size=batch_size,
+        callbacks=_callbacks(),
+        class_weight=cw,
+        verbose=verbose,
+    )
+    test_metrics = _evaluate(model, dataset.X_test, dataset.y_test)
+    if save_path is not None:
+        save_path = Path(save_path)
+        save_path.parent.mkdir(parents=True, exist_ok=True)
+        model.save(save_path)
+    return TrainResult(model=model, history={k: list(map(float, v)) for k, v in history.history.items()},
+                       test_metrics=test_metrics)
+# --------------------------------------------------------------------------- #
+# Tiny grid search                                                            #
+# --------------------------------------------------------------------------- #
+def grid_search(
+    dataset: Dataset,
+    build_fn_factory: Callable[..., Callable[[], tf.keras.Model]],
+    grid: dict[str, Iterable],
+    n_splits: int = 5,
+    epochs: int = 60,
+    batch_size: int = 32,
+    verbose: int = 0,
+) -> list[dict]:
+    """Simple grid search using grouped CV.
+    ``build_fn_factory(**hp)`` must return a zero-arg builder of a fresh model.
+    Returns a list of dicts sorted by mean validation AUC (best first).
+    """
+    keys = list(grid.keys())
+    results = []
+    for combo in product(*[grid[k] for k in keys]):
+        hp = dict(zip(keys, combo))
+        if verbose:
+            print(f"-> {hp}")
+        cv = cross_validate(
+            dataset, build_fn_factory(**hp),
+            n_splits=n_splits, epochs=epochs, batch_size=batch_size,
+            verbose=0,
+        )
+        results.append({"hp": hp, "mean": cv.mean, "std": cv.std})
+    results.sort(key=lambda r: r["mean"]["auc"], reverse=True)
+    return results