Spaces:
Configuration error
Configuration error
| """Tests for the opt-in training-stability primitives. | |
| Covers: | |
| * ``label_smoothed_crossentropy`` returns a per-token loss tensor with the | |
| same shape as the baseline sparse loss, and reduces to it at smoothing=0. | |
| * ``WarmupCosineDecay`` produces the expected piecewise schedule. | |
| * ``build_loss`` / ``build_learning_rate`` dispatch correctly on config. | |
| """ | |
| from __future__ import annotations | |
| from itertools import pairwise | |
| import numpy as np | |
| import pytest | |
| from captioning.training.losses import build_loss, label_smoothed_crossentropy | |
| from captioning.training.schedules import WarmupCosineDecay, build_learning_rate | |
| # ---- Label smoothing ------------------------------------------------------- | |
| def test_label_smoothed_loss_returns_per_token_shape() -> None: | |
| import tensorflow as tf | |
| vocab = 5 | |
| loss_fn = label_smoothed_crossentropy(0.1, vocab) | |
| y_true = tf.constant([[1, 2, 0]], dtype=tf.int32) | |
| y_pred = tf.constant( | |
| [ | |
| [ | |
| [0.05, 0.85, 0.05, 0.025, 0.025], | |
| [0.05, 0.05, 0.85, 0.025, 0.025], | |
| [0.85, 0.05, 0.05, 0.025, 0.025], | |
| ] | |
| ], | |
| dtype=tf.float32, | |
| ) | |
| loss = loss_fn(y_true, y_pred).numpy() | |
| assert loss.shape == (1, 3) | |
| # The first two tokens are confidently correct → low loss. | |
| assert loss[0, 0] < 1.0 | |
| assert loss[0, 1] < 1.0 | |
| def test_label_smoothing_with_zero_returns_baseline_loss() -> None: | |
| loss = build_loss(0.0, vocab_size=10) | |
| # The baseline SparseCategoricalCrossentropy is an instance, not a function. | |
| import tensorflow as tf | |
| assert isinstance(loss, tf.keras.losses.SparseCategoricalCrossentropy) | |
| def test_label_smoothing_is_higher_than_unsmoothed_on_perfect_prediction() -> None: | |
| """Smoothing punishes overconfidence — perfect one-hot prediction gets a | |
| higher per-token loss with smoothing > 0 than without.""" | |
| import tensorflow as tf | |
| vocab = 5 | |
| y_true = tf.constant([[1]], dtype=tf.int32) | |
| one_hot_pred = tf.constant([[[0.0, 1.0, 0.0, 0.0, 0.0]]], dtype=tf.float32) | |
| smoothed = label_smoothed_crossentropy(0.1, vocab)(y_true, one_hot_pred).numpy() | |
| unsmoothed = -np.log(1.0) # sparse cross-entropy on argmax==y_true is 0 | |
| assert smoothed[0, 0] > unsmoothed + 1e-3 | |
| # ---- Learning-rate schedule ----------------------------------------------- | |
| def test_warmup_cosine_zero_at_step_zero() -> None: | |
| import tensorflow as tf | |
| schedule = WarmupCosineDecay(peak_learning_rate=1.0, warmup_steps=10, decay_steps=100) | |
| assert float(schedule(tf.constant(0, dtype=tf.int64))) == pytest.approx(0.0) | |
| def test_warmup_cosine_peaks_at_end_of_warmup() -> None: | |
| import tensorflow as tf | |
| schedule = WarmupCosineDecay(peak_learning_rate=1.0, warmup_steps=10, decay_steps=100) | |
| assert float(schedule(tf.constant(10, dtype=tf.int64))) == pytest.approx(1.0, abs=1e-3) | |
| def test_warmup_cosine_floors_at_end_of_decay() -> None: | |
| import tensorflow as tf | |
| schedule = WarmupCosineDecay( | |
| peak_learning_rate=1.0, | |
| warmup_steps=10, | |
| decay_steps=100, | |
| min_learning_rate=0.1, | |
| ) | |
| final = float(schedule(tf.constant(110, dtype=tf.int64))) | |
| assert final == pytest.approx(0.1, abs=1e-3) | |
| def test_warmup_cosine_is_monotone_during_warmup() -> None: | |
| import tensorflow as tf | |
| schedule = WarmupCosineDecay(peak_learning_rate=1.0, warmup_steps=10, decay_steps=100) | |
| values = [float(schedule(tf.constant(s, dtype=tf.int64))) for s in range(11)] | |
| assert all(b >= a for a, b in pairwise(values)) | |
| def test_build_learning_rate_returns_float_for_constant() -> None: | |
| lr = build_learning_rate( | |
| schedule="constant", | |
| peak_learning_rate=1e-3, | |
| warmup_steps=0, | |
| decay_steps=10, | |
| min_learning_rate=0.0, | |
| ) | |
| assert lr == 1e-3 | |
| def test_build_learning_rate_returns_schedule_for_cosine() -> None: | |
| lr = build_learning_rate( | |
| schedule="cosine", | |
| peak_learning_rate=1e-3, | |
| warmup_steps=5, | |
| decay_steps=50, | |
| min_learning_rate=0.0, | |
| ) | |
| assert isinstance(lr, WarmupCosineDecay) | |
| def test_build_learning_rate_rejects_unknown_schedule() -> None: | |
| with pytest.raises(ValueError, match="unsupported"): | |
| build_learning_rate( | |
| schedule="square_wave", | |
| peak_learning_rate=1.0, | |
| warmup_steps=0, | |
| decay_steps=10, | |
| min_learning_rate=0.0, | |
| ) | |