Spaces:
Running
Running
Miyu Horiuchi commited on
Commit ·
edf6713
1
Parent(s): 65e8e0f
Add tests for explore module (correlations + class means)
Browse files4 tests verify:
- Strongest feature ranks first in feature_target_correlations
- Empty results when n < 30 or target missing
- Class-mean differences detect separating features
Total tests: 19/19 passing.
- tests/test_explore.py +62 -0
tests/test_explore.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for the feature/target exploration helpers."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pandas as pd
|
| 6 |
+
|
| 7 |
+
from microbe_model.explore import class_mean_features, feature_target_correlations
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def _synthetic_with_signal(n: int = 200, seed: int = 0) -> pd.DataFrame:
|
| 11 |
+
rng = np.random.default_rng(seed)
|
| 12 |
+
df = pd.DataFrame({
|
| 13 |
+
"f_strong": rng.normal(size=n),
|
| 14 |
+
"f_weak": rng.normal(size=n),
|
| 15 |
+
"f_noise": rng.normal(size=n),
|
| 16 |
+
})
|
| 17 |
+
# Strong signal: target = 30 + 5 * f_strong + noise
|
| 18 |
+
df["optimal_temperature_c"] = 30 + 5 * df["f_strong"] + rng.normal(scale=1, size=n)
|
| 19 |
+
# Make some NaN
|
| 20 |
+
df.loc[rng.random(n) > 0.95, "optimal_temperature_c"] = np.nan
|
| 21 |
+
return df
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def test_correlations_rank_strong_feature_first() -> None:
|
| 25 |
+
df = _synthetic_with_signal()
|
| 26 |
+
feats = ["f_strong", "f_weak", "f_noise"]
|
| 27 |
+
corrs = feature_target_correlations(df, feats, "optimal_temperature_c", top_n=3)
|
| 28 |
+
assert len(corrs) == 3
|
| 29 |
+
# Strongest feature should be ranked first
|
| 30 |
+
assert corrs[0]["feature"] == "f_strong"
|
| 31 |
+
assert abs(corrs[0]["spearman_rho"]) > 0.5
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def test_correlations_skip_low_n() -> None:
|
| 35 |
+
df = pd.DataFrame({
|
| 36 |
+
"f0": np.arange(10),
|
| 37 |
+
"y": [1.0] * 5 + [np.nan] * 5,
|
| 38 |
+
})
|
| 39 |
+
assert feature_target_correlations(df, ["f0"], "y", top_n=5) == []
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def test_correlations_skip_missing_target() -> None:
|
| 43 |
+
df = pd.DataFrame({"f0": [1.0, 2.0]})
|
| 44 |
+
assert feature_target_correlations(df, ["f0"], "missing_target", top_n=5) == []
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def test_class_means_orders_most_separating_first() -> None:
|
| 48 |
+
rng = np.random.default_rng(0)
|
| 49 |
+
n = 200
|
| 50 |
+
cls = rng.choice(["a", "b", "c"], size=n)
|
| 51 |
+
df = pd.DataFrame({
|
| 52 |
+
"f_separating": np.where(cls == "a", 0, np.where(cls == "b", 5, 10)) + rng.normal(scale=0.5, size=n),
|
| 53 |
+
"f_useless": rng.normal(size=n),
|
| 54 |
+
"target": cls,
|
| 55 |
+
})
|
| 56 |
+
out = class_mean_features(df, ["f_separating", "f_useless"], "target", top_features=2)
|
| 57 |
+
assert len(out) == 2
|
| 58 |
+
assert out[0]["feature"] == "f_separating"
|
| 59 |
+
assert out[1]["feature"] == "f_useless"
|
| 60 |
+
# Class means should differ substantially for f_separating
|
| 61 |
+
means = out[0]["means"]
|
| 62 |
+
assert max(means.values()) - min(means.values()) > 5
|