Spaces:
Running
on
Zero
Running
on
Zero
| """RNG imitiating torch cuda randn on CPU. You are welcome. | |
| Usage: | |
| ``` | |
| g = Generator(seed=0) | |
| print(g.randn(shape=(3, 4))) | |
| ``` | |
| Expected output: | |
| ``` | |
| [[-0.92466259 -0.42534415 -2.6438457 0.14518388] | |
| [-0.12086647 -0.57972564 -0.62285122 -0.32838709] | |
| [-1.07454231 -0.36314407 -1.67105067 2.26550497]] | |
| ``` | |
| """ | |
| import numpy as np | |
| philox_m = [0xD2511F53, 0xCD9E8D57] | |
| philox_w = [0x9E3779B9, 0xBB67AE85] | |
| two_pow32_inv = np.array([2.3283064e-10], dtype=np.float32) | |
| two_pow32_inv_2pi = np.array([2.3283064e-10 * 6.2831855], dtype=np.float32) | |
| def uint32(x): | |
| """Converts (N,) np.uint64 array into (2, N) np.unit32 array.""" | |
| return x.view(np.uint32).reshape(-1, 2).transpose(1, 0) | |
| def philox4_round(counter, key): | |
| """A single round of the Philox 4x32 random number generator.""" | |
| v1 = uint32(counter[0].astype(np.uint64) * philox_m[0]) | |
| v2 = uint32(counter[2].astype(np.uint64) * philox_m[1]) | |
| counter[0] = v2[1] ^ counter[1] ^ key[0] | |
| counter[1] = v2[0] | |
| counter[2] = v1[1] ^ counter[3] ^ key[1] | |
| counter[3] = v1[0] | |
| def philox4_32(counter, key, rounds=10): | |
| """Generates 32-bit random numbers using the Philox 4x32 random number generator. | |
| Parameters: | |
| counter (numpy.ndarray): A 4xN array of 32-bit integers representing the counter values (offset into generation). | |
| key (numpy.ndarray): A 2xN array of 32-bit integers representing the key values (seed). | |
| rounds (int): The number of rounds to perform. | |
| Returns: | |
| numpy.ndarray: A 4xN array of 32-bit integers containing the generated random numbers. | |
| """ | |
| for _ in range(rounds - 1): | |
| philox4_round(counter, key) | |
| key[0] = key[0] + philox_w[0] | |
| key[1] = key[1] + philox_w[1] | |
| philox4_round(counter, key) | |
| return counter | |
| def box_muller(x, y): | |
| """Returns just the first out of two numbers generated by Box–Muller transform algorithm.""" | |
| u = x * two_pow32_inv + two_pow32_inv / 2 | |
| v = y * two_pow32_inv_2pi + two_pow32_inv_2pi / 2 | |
| s = np.sqrt(-2.0 * np.log(u)) | |
| r1 = s * np.sin(v) | |
| return r1.astype(np.float32) | |
| class Generator: | |
| """RNG that produces same outputs as torch.randn(..., device='cuda') on CPU""" | |
| def __init__(self, seed): | |
| self.seed = seed | |
| self.offset = 0 | |
| def randn(self, shape): | |
| """Generate a sequence of n standard normal random variables using the Philox 4x32 random number generator and the Box-Muller transform.""" | |
| n = 1 | |
| for x in shape: | |
| n *= x | |
| counter = np.zeros((4, n), dtype=np.uint32) | |
| counter[0] = self.offset | |
| counter[2] = np.arange(n, dtype=np.uint32) # up to 2^32 numbers can be generated - if you want more you'd need to spill into counter[3] | |
| self.offset += 1 | |
| key = np.empty(n, dtype=np.uint64) | |
| key.fill(self.seed) | |
| key = uint32(key) | |
| g = philox4_32(counter, key) | |
| return box_muller(g[0], g[1]).reshape(shape) # discard g[2] and g[3] | |