Miyu Horiuchi commited on
Commit
edf6713
·
1 Parent(s): 65e8e0f

Add tests for explore module (correlations + class means)

Browse files

4 tests verify:
- Strongest feature ranks first in feature_target_correlations
- Empty results when n < 30 or target missing
- Class-mean differences detect separating features

Total tests: 19/19 passing.

Files changed (1) hide show
  1. tests/test_explore.py +62 -0
tests/test_explore.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for the feature/target exploration helpers."""
2
+ from __future__ import annotations
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+ from microbe_model.explore import class_mean_features, feature_target_correlations
8
+
9
+
10
+ def _synthetic_with_signal(n: int = 200, seed: int = 0) -> pd.DataFrame:
11
+ rng = np.random.default_rng(seed)
12
+ df = pd.DataFrame({
13
+ "f_strong": rng.normal(size=n),
14
+ "f_weak": rng.normal(size=n),
15
+ "f_noise": rng.normal(size=n),
16
+ })
17
+ # Strong signal: target = 30 + 5 * f_strong + noise
18
+ df["optimal_temperature_c"] = 30 + 5 * df["f_strong"] + rng.normal(scale=1, size=n)
19
+ # Make some NaN
20
+ df.loc[rng.random(n) > 0.95, "optimal_temperature_c"] = np.nan
21
+ return df
22
+
23
+
24
+ def test_correlations_rank_strong_feature_first() -> None:
25
+ df = _synthetic_with_signal()
26
+ feats = ["f_strong", "f_weak", "f_noise"]
27
+ corrs = feature_target_correlations(df, feats, "optimal_temperature_c", top_n=3)
28
+ assert len(corrs) == 3
29
+ # Strongest feature should be ranked first
30
+ assert corrs[0]["feature"] == "f_strong"
31
+ assert abs(corrs[0]["spearman_rho"]) > 0.5
32
+
33
+
34
+ def test_correlations_skip_low_n() -> None:
35
+ df = pd.DataFrame({
36
+ "f0": np.arange(10),
37
+ "y": [1.0] * 5 + [np.nan] * 5,
38
+ })
39
+ assert feature_target_correlations(df, ["f0"], "y", top_n=5) == []
40
+
41
+
42
+ def test_correlations_skip_missing_target() -> None:
43
+ df = pd.DataFrame({"f0": [1.0, 2.0]})
44
+ assert feature_target_correlations(df, ["f0"], "missing_target", top_n=5) == []
45
+
46
+
47
+ def test_class_means_orders_most_separating_first() -> None:
48
+ rng = np.random.default_rng(0)
49
+ n = 200
50
+ cls = rng.choice(["a", "b", "c"], size=n)
51
+ df = pd.DataFrame({
52
+ "f_separating": np.where(cls == "a", 0, np.where(cls == "b", 5, 10)) + rng.normal(scale=0.5, size=n),
53
+ "f_useless": rng.normal(size=n),
54
+ "target": cls,
55
+ })
56
+ out = class_mean_features(df, ["f_separating", "f_useless"], "target", top_features=2)
57
+ assert len(out) == 2
58
+ assert out[0]["feature"] == "f_separating"
59
+ assert out[1]["feature"] == "f_useless"
60
+ # Class means should differ substantially for f_separating
61
+ means = out[0]["means"]
62
+ assert max(means.values()) - min(means.values()) > 5