File size: 1,544 Bytes
3a2e5f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""Reproducibility helpers.

Why this matters: the IEEE notebook's ``random.shuffle`` of image keys (cell 11)
is non-deterministic without a seed, which means the same code can produce a
different train/val split on every run — and therefore different BLEU. Pinning
the seed makes results reproducible across machines and dates.
"""

from __future__ import annotations

import os
import random
from typing import TYPE_CHECKING

if TYPE_CHECKING:  # pragma: no cover
    pass


def set_global_seed(seed: int) -> None:
    """Seed Python, NumPy, and TensorFlow RNGs from a single integer.

    TF's seeding has multiple layers (``tf.random.set_seed`` for graph-level,
    ``os.environ['PYTHONHASHSEED']`` for hash randomisation, and op-level seeds
    for individual ops). We set as many as practical without forcing TF's
    deterministic mode (which can hurt training throughput by ~15%).

    Args:
        seed: Any non-negative integer.
    """
    if seed < 0:
        raise ValueError(f"seed must be non-negative, got {seed}")

    os.environ["PYTHONHASHSEED"] = str(seed)
    random.seed(seed)

    # Imported lazily so the utils package doesn't pull NumPy at import time
    # for unrelated callers (e.g. config validation).
    import numpy as np

    np.random.seed(seed)

    try:
        import tensorflow as tf

        tf.random.set_seed(seed)
        tf.keras.utils.set_random_seed(seed)
    except ImportError:  # pragma: no cover
        # TF is an optional dep at the *utility* layer; ML callers always have it.
        pass