File size: 5,303 Bytes
2216aae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
"""
Utilities for generating random numbers, random sequences, and
random selections.
"""
import networkx as nx
from networkx.utils import py_random_state
__all__ = [
"powerlaw_sequence",
"is_valid_tree_degree_sequence",
"zipf_rv",
"cumulative_distribution",
"discrete_sequence",
"random_weighted_sample",
"weighted_choice",
]
# The same helpers for choosing random sequences from distributions
# uses Python's random module
# https://docs.python.org/3/library/random.html
@py_random_state(2)
def powerlaw_sequence(n, exponent=2.0, seed=None):
"""
Return sample sequence of length n from a power law distribution.
"""
return [seed.paretovariate(exponent - 1) for i in range(n)]
def is_valid_tree_degree_sequence(degree_sequence):
"""Check if a degree sequence is valid for a tree.
Two conditions must be met for a degree sequence to be valid for a tree:
1. The number of nodes must be one more than the number of edges.
2. The degree sequence must be trivial or have only strictly positive
node degrees.
Parameters
----------
degree_sequence : iterable
Iterable of node degrees.
Returns
-------
bool
Whether the degree sequence is valid for a tree.
str
Reason for invalidity, or dummy string if valid.
"""
seq = list(degree_sequence)
number_of_nodes = len(seq)
twice_number_of_edges = sum(seq)
if 2 * number_of_nodes - twice_number_of_edges != 2:
return False, "tree must have one more node than number of edges"
elif seq != [0] and any(d <= 0 for d in seq):
return False, "nontrivial tree must have strictly positive node degrees"
return True, ""
@py_random_state(2)
def zipf_rv(alpha, xmin=1, seed=None):
r"""Returns a random value chosen from the Zipf distribution.
The return value is an integer drawn from the probability distribution
.. math::
p(x)=\frac{x^{-\alpha}}{\zeta(\alpha, x_{\min})},
where $\zeta(\alpha, x_{\min})$ is the Hurwitz zeta function.
Parameters
----------
alpha : float
Exponent value of the distribution
xmin : int
Minimum value
seed : integer, random_state, or None (default)
Indicator of random number generation state.
See :ref:`Randomness<randomness>`.
Returns
-------
x : int
Random value from Zipf distribution
Raises
------
ValueError:
If xmin < 1 or
If alpha <= 1
Notes
-----
The rejection algorithm generates random values for a the power-law
distribution in uniformly bounded expected time dependent on
parameters. See [1]_ for details on its operation.
Examples
--------
>>> nx.utils.zipf_rv(alpha=2, xmin=3, seed=42)
8
References
----------
.. [1] Luc Devroye, Non-Uniform Random Variate Generation,
Springer-Verlag, New York, 1986.
"""
if xmin < 1:
raise ValueError("xmin < 1")
if alpha <= 1:
raise ValueError("a <= 1.0")
a1 = alpha - 1.0
b = 2**a1
while True:
u = 1.0 - seed.random() # u in (0,1]
v = seed.random() # v in [0,1)
x = int(xmin * u ** -(1.0 / a1))
t = (1.0 + (1.0 / x)) ** a1
if v * x * (t - 1.0) / (b - 1.0) <= t / b:
break
return x
def cumulative_distribution(distribution):
"""Returns normalized cumulative distribution from discrete distribution."""
cdf = [0.0]
cumulative = 0.0
for element in distribution:
cumulative += element
cdf.append(cumulative)
return [element / cumulative for element in cdf]
@py_random_state(3)
def discrete_sequence(n, distribution=None, cdistribution=None, seed=None):
"""
Return sample sequence of length n from a given discrete distribution
or discrete cumulative distribution.
One of the following must be specified.
distribution = histogram of values, will be normalized
cdistribution = normalized discrete cumulative distribution
"""
import bisect
if cdistribution is not None:
cdf = cdistribution
elif distribution is not None:
cdf = cumulative_distribution(distribution)
else:
raise nx.NetworkXError(
"discrete_sequence: distribution or cdistribution missing"
)
# get a uniform random number
inputseq = [seed.random() for i in range(n)]
# choose from CDF
seq = [bisect.bisect_left(cdf, s) - 1 for s in inputseq]
return seq
@py_random_state(2)
def random_weighted_sample(mapping, k, seed=None):
"""Returns k items without replacement from a weighted sample.
The input is a dictionary of items with weights as values.
"""
if k > len(mapping):
raise ValueError("sample larger than population")
sample = set()
while len(sample) < k:
sample.add(weighted_choice(mapping, seed))
return list(sample)
@py_random_state(1)
def weighted_choice(mapping, seed=None):
"""Returns a single element from a weighted sample.
The input is a dictionary of items with weights as values.
"""
# use roulette method
rnd = seed.random() * sum(mapping.values())
for k, w in mapping.items():
rnd -= w
if rnd < 0:
return k
|