SignalMod / tests /test_stable_training.py
Mirae Kang
feat: implement new models and improve UI, #23
46cc63a
raw
history blame
2.67 kB
"""Unit tests for stable training helpers (no full fine-tune)."""
import numpy as np
import pytest
from src.models.hybrid_ensemble import fit_lr_with_gap_control, soft_vote_probs
from src.models.transformer_trainer import freeze_distilbert_partial
def test_fit_lr_with_gap_control_picks_regularized_c():
import pandas as pd
rng = np.random.default_rng(0)
n = 120
X = pd.Series(["word " * (i % 5) + str(i) for i in range(n)])
y = pd.Series(rng.integers(0, 2, size=n))
X_tr, X_te = X.iloc[:100], X.iloc[100:]
y_tr, y_te = y.iloc[:100], y.iloc[100:]
lr_cfg = {
"C": 0.05,
"max_iter": 500,
"class_weight": "balanced",
"solver": "lbfgs",
"gap_search": {
"enabled": True,
"param_grid": [{"C": 0.05, "max_features": 200}, {"C": 0.001, "max_features": 50}],
},
}
tfidf_cfg = {"max_features": 200, "ngram_range": [1, 1], "min_df": 1}
model, meta = fit_lr_with_gap_control(X_tr, y_tr, X_te, y_te, lr_cfg, tfidf_cfg, max_gap=0.05)
assert model.is_fitted
assert "C" in meta
def test_soft_vote_equal_weights():
a = np.array([0.8, 0.2])
b = np.array([0.4, 0.6])
out = soft_vote_probs(a, b, 0.5, 0.5)
np.testing.assert_allclose(out, [0.6, 0.4])
def test_deduplicate_by_cosine_drops_near_duplicates(monkeypatch):
from src.features import augmentation as aug
class FakeModel:
def encode(self, texts, **kwargs):
if len(texts) == 1:
return np.array([[1.0, 0.0]])
return np.array([[0.99, 0.01], [0.0, 1.0]])
monkeypatch.setattr(
"sentence_transformers.SentenceTransformer",
lambda *_a, **_k: FakeModel(),
)
kept_t, kept_l = aug.deduplicate_by_cosine(
["near dup", "different"],
[1, 1],
["ref"],
threshold=0.95,
)
assert kept_t == ["different"]
assert kept_l == [1]
def test_partial_freeze_distilbert():
pytest.importorskip("transformers")
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained(
"distilbert-base-uncased",
num_labels=2,
)
freeze_distilbert_partial(model, freeze_first_n=4)
layers = list(model.distilbert.transformer.layer)
for i, layer in enumerate(layers):
frozen = not any(p.requires_grad for p in layer.parameters())
if i < 4:
assert frozen, f"layer {i} should be frozen"
else:
assert not frozen, f"layer {i} should be trainable"
assert model.pre_classifier.weight.requires_grad
assert model.classifier.weight.requires_grad