"""Unit tests for stable training helpers (no full fine-tune).""" import numpy as np import pytest from src.models.hybrid_ensemble import fit_lr_with_gap_control, soft_vote_probs from src.models.transformer_trainer import freeze_distilbert_partial def test_fit_lr_with_gap_control_picks_regularized_c(): import pandas as pd rng = np.random.default_rng(0) n = 120 X = pd.Series(["word " * (i % 5) + str(i) for i in range(n)]) y = pd.Series(rng.integers(0, 2, size=n)) X_tr, X_te = X.iloc[:100], X.iloc[100:] y_tr, y_te = y.iloc[:100], y.iloc[100:] lr_cfg = { "C": 0.05, "max_iter": 500, "class_weight": "balanced", "solver": "lbfgs", "gap_search": { "enabled": True, "param_grid": [{"C": 0.05, "max_features": 200}, {"C": 0.001, "max_features": 50}], }, } tfidf_cfg = {"max_features": 200, "ngram_range": [1, 1], "min_df": 1} model, meta = fit_lr_with_gap_control(X_tr, y_tr, X_te, y_te, lr_cfg, tfidf_cfg, max_gap=0.05) assert model.is_fitted assert "C" in meta def test_soft_vote_equal_weights(): a = np.array([0.8, 0.2]) b = np.array([0.4, 0.6]) out = soft_vote_probs(a, b, 0.5, 0.5) np.testing.assert_allclose(out, [0.6, 0.4]) def test_deduplicate_by_cosine_drops_near_duplicates(monkeypatch): from src.features import augmentation as aug class FakeModel: def encode(self, texts, **kwargs): if len(texts) == 1: return np.array([[1.0, 0.0]]) return np.array([[0.99, 0.01], [0.0, 1.0]]) monkeypatch.setattr( "sentence_transformers.SentenceTransformer", lambda *_a, **_k: FakeModel(), ) kept_t, kept_l = aug.deduplicate_by_cosine( ["near dup", "different"], [1, 1], ["ref"], threshold=0.95, ) assert kept_t == ["different"] assert kept_l == [1] def test_partial_freeze_distilbert(): pytest.importorskip("transformers") from transformers import AutoModelForSequenceClassification model = AutoModelForSequenceClassification.from_pretrained( "distilbert-base-uncased", num_labels=2, ) freeze_distilbert_partial(model, freeze_first_n=4) layers = list(model.distilbert.transformer.layer) for i, layer in enumerate(layers): frozen = not any(p.requires_grad for p in layer.parameters()) if i < 4: assert frozen, f"layer {i} should be frozen" else: assert not frozen, f"layer {i} should be trainable" assert model.pre_classifier.weight.requires_grad assert model.classifier.weight.requires_grad