File size: 5,303 Bytes

2216aae

"""
Utilities for generating random numbers, random sequences, and
random selections.
"""

import networkx as nx
from networkx.utils import py_random_state

__all__ = [
    "powerlaw_sequence",
    "is_valid_tree_degree_sequence",
    "zipf_rv",
    "cumulative_distribution",
    "discrete_sequence",
    "random_weighted_sample",
    "weighted_choice",
]


# The same helpers for choosing random sequences from distributions
# uses Python's random module
# https://docs.python.org/3/library/random.html


@py_random_state(2)
def powerlaw_sequence(n, exponent=2.0, seed=None):
    """
    Return sample sequence of length n from a power law distribution.
    """
    return [seed.paretovariate(exponent - 1) for i in range(n)]


def is_valid_tree_degree_sequence(degree_sequence):
    """Check if a degree sequence is valid for a tree.

    Two conditions must be met for a degree sequence to be valid for a tree:

    1. The number of nodes must be one more than the number of edges.
    2. The degree sequence must be trivial or have only strictly positive
       node degrees.

    Parameters
    ----------
    degree_sequence : iterable
        Iterable of node degrees.

    Returns
    -------
    bool
        Whether the degree sequence is valid for a tree.
    str
        Reason for invalidity, or dummy string if valid.
    """
    seq = list(degree_sequence)
    number_of_nodes = len(seq)
    twice_number_of_edges = sum(seq)

    if 2 * number_of_nodes - twice_number_of_edges != 2:
        return False, "tree must have one more node than number of edges"
    elif seq != [0] and any(d <= 0 for d in seq):
        return False, "nontrivial tree must have strictly positive node degrees"
    return True, ""


@py_random_state(2)
def zipf_rv(alpha, xmin=1, seed=None):
    r"""Returns a random value chosen from the Zipf distribution.

    The return value is an integer drawn from the probability distribution

    .. math::

        p(x)=\frac{x^{-\alpha}}{\zeta(\alpha, x_{\min})},

    where $\zeta(\alpha, x_{\min})$ is the Hurwitz zeta function.

    Parameters
    ----------
    alpha : float
      Exponent value of the distribution
    xmin : int
      Minimum value
    seed : integer, random_state, or None (default)
        Indicator of random number generation state.
        See :ref:`Randomness<randomness>`.

    Returns
    -------
    x : int
      Random value from Zipf distribution

    Raises
    ------
    ValueError:
      If xmin < 1 or
      If alpha <= 1

    Notes
    -----
    The rejection algorithm generates random values for a the power-law
    distribution in uniformly bounded expected time dependent on
    parameters.  See [1]_ for details on its operation.

    Examples
    --------
    >>> nx.utils.zipf_rv(alpha=2, xmin=3, seed=42)
    8

    References
    ----------
    .. [1] Luc Devroye, Non-Uniform Random Variate Generation,
       Springer-Verlag, New York, 1986.
    """
    if xmin < 1:
        raise ValueError("xmin < 1")
    if alpha <= 1:
        raise ValueError("a <= 1.0")
    a1 = alpha - 1.0
    b = 2**a1
    while True:
        u = 1.0 - seed.random()  # u in (0,1]
        v = seed.random()  # v in [0,1)
        x = int(xmin * u ** -(1.0 / a1))
        t = (1.0 + (1.0 / x)) ** a1
        if v * x * (t - 1.0) / (b - 1.0) <= t / b:
            break
    return x


def cumulative_distribution(distribution):
    """Returns normalized cumulative distribution from discrete distribution."""

    cdf = [0.0]
    cumulative = 0.0
    for element in distribution:
        cumulative += element
        cdf.append(cumulative)
    return [element / cumulative for element in cdf]


@py_random_state(3)
def discrete_sequence(n, distribution=None, cdistribution=None, seed=None):
    """
    Return sample sequence of length n from a given discrete distribution
    or discrete cumulative distribution.

    One of the following must be specified.

    distribution = histogram of values, will be normalized

    cdistribution = normalized discrete cumulative distribution

    """
    import bisect

    if cdistribution is not None:
        cdf = cdistribution
    elif distribution is not None:
        cdf = cumulative_distribution(distribution)
    else:
        raise nx.NetworkXError(
            "discrete_sequence: distribution or cdistribution missing"
        )

    # get a uniform random number
    inputseq = [seed.random() for i in range(n)]

    # choose from CDF
    seq = [bisect.bisect_left(cdf, s) - 1 for s in inputseq]
    return seq


@py_random_state(2)
def random_weighted_sample(mapping, k, seed=None):
    """Returns k items without replacement from a weighted sample.

    The input is a dictionary of items with weights as values.
    """
    if k > len(mapping):
        raise ValueError("sample larger than population")
    sample = set()
    while len(sample) < k:
        sample.add(weighted_choice(mapping, seed))
    return list(sample)


@py_random_state(1)
def weighted_choice(mapping, seed=None):
    """Returns a single element from a weighted sample.

    The input is a dictionary of items with weights as values.
    """
    # use roulette method
    rnd = seed.random() * sum(mapping.values())
    for k, w in mapping.items():
        rnd -= w
        if rnd < 0:
            return k