Spaces:
Sleeping
Sleeping
File size: 7,784 Bytes
05a686e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 | """
KubeSimEnv Utils - Probability helpers and simulation utilities
Random failure rate generators, latency simulators, resource usage curves.
Makes the simulation feel realistic and non-deterministic in the right ways.
"""
import numpy as np
import math
from typing import Dict, List, Any, Optional
from datetime import datetime
_RNG = np.random.default_rng()
def set_random_seed(seed: Optional[int]) -> None:
"""Set module-level RNG seed for deterministic utility behavior."""
global _RNG
_RNG = np.random.default_rng(seed)
class ProbabilityHelpers:
"""Helpers for generating realistic probabilities and distributions"""
@staticmethod
def weighted_random_choice(choices: List[Any], weights: List[float]) -> Any:
"""Make a weighted random choice"""
if not choices or not weights or len(choices) != len(weights):
return _RNG.choice(choices) if choices else None
# Normalize weights
total_weight = sum(weights)
if total_weight == 0:
return _RNG.choice(choices)
normalized_weights = [w / total_weight for w in weights]
return _RNG.choice(choices, p=normalized_weights)
@staticmethod
def exponential_backoff(attempt: int, base_delay: float = 1.0, max_delay: float = 60.0) -> float:
"""Calculate exponential backoff delay"""
delay = base_delay * (2 ** attempt)
return min(delay, max_delay)
@staticmethod
def poisson_arrival_rate(lambda_rate: float, time_window: float) -> int:
"""Generate number of events in time window using Poisson distribution"""
return int(_RNG.poisson(max(lambda_rate * time_window, 0)))
@staticmethod
def failure_probability_over_time(base_rate: float, time_elapsed: float,
max_rate: float = 1.0) -> float:
"""Calculate failure probability that increases over time"""
probability = base_rate * (1 + math.log(1 + time_elapsed))
return min(probability, max_rate)
@staticmethod
def random_failure_rate(min_rate: float = 0.1, max_rate: float = 0.9) -> float:
"""Generate a random failure rate within bounds"""
return float(_RNG.uniform(min_rate, max_rate))
class LatencySimulator:
"""Simulates network and service latency"""
def __init__(self, base_latency_ms: float = 50.0):
self.base_latency_ms = base_latency_ms
self.load_factor = 1.0
def set_load(self, load_factor: float):
"""Set system load factor (1.0 = normal, >1.0 = overloaded)"""
self.load_factor = max(0.1, load_factor)
def get_latency(self) -> float:
"""Get simulated latency in milliseconds"""
# Base latency + load-dependent component + random jitter
load_latency = self.base_latency_ms * (self.load_factor - 1.0) * 2
jitter = float(_RNG.normal(0, self.base_latency_ms * 0.1))
latency = self.base_latency_ms + max(0, load_latency) + jitter
return max(1.0, latency) # Minimum 1ms latency
def get_latency_with_spike(self, spike_probability: float = 0.05,
spike_multiplier: float = 5.0) -> float:
"""Get latency with occasional spikes"""
latency = self.get_latency()
if float(_RNG.random()) < spike_probability:
latency *= spike_multiplier
return latency
class ResourceUsageSimulator:
"""Simulates realistic CPU and memory usage patterns"""
def __init__(self):
self.time_offset = float(_RNG.uniform(0, 2 * math.pi))
def get_cpu_usage(self, base_usage: float = 0.3,
variation: float = 0.2) -> float:
"""Get CPU usage as percentage (0-100)"""
# Simulate daily patterns with some noise
time_factor = (datetime.now().timestamp() / 3600) % 24 # Hours in day
daily_pattern = 0.5 * math.sin(2 * math.pi * time_factor / 24) + 0.5
usage = base_usage + variation * daily_pattern
usage += float(_RNG.normal(0, 0.05)) # Noise
return max(0.0, min(1.0, usage)) * 100 # Clamp to 0-100%
def get_memory_usage(self, base_usage: float = 0.4,
variation: float = 0.15) -> float:
"""Get memory usage as percentage (0-100)"""
# Memory usage tends to creep up over time (simulate leak)
time_factor = min((datetime.now().timestamp() / 86400) % 7, 1.0) # Weekly pattern
leak_factor = 0.1 * time_factor # Slow leak over week
usage = base_usage + leak_factor
usage += float(_RNG.normal(0, 0.03)) # Noise
return max(0.0, min(1.0, usage)) * 100 # Clamp to 0-100%
def get_resource_curve(self, resource_type: str,
time_elapsed: float) -> float:
"""Get resource usage following a specific curve"""
if resource_type == "cpu":
# CPU: periodic with bursts
return 0.3 + 0.4 * math.sin(time_elapsed / 100) + 0.2 * float(_RNG.random())
elif resource_type == "memory":
# Memory: gradual increase with occasional GC drops
base = 0.2 + 0.6 * (1 - math.exp(-time_elapsed / 1000))
gc_drop = 0.3 if float(_RNG.random()) < 0.01 else 0 # Occasional GC
return max(0, base - gc_drop)
elif resource_type == "disk":
# Disk: steady growth
return 0.1 + 0.8 * min(time_elapsed / 10000, 1.0)
else:
return 0.5
class NetworkSimulator:
"""Simulates network conditions and partitions"""
def __init__(self):
self.partition_probability = 0.01
self.latency_ms = 10.0
self.bandwidth_mbps = 1000.0
def simulate_partition(self) -> bool:
"""Return True if network partition is simulated"""
return float(_RNG.random()) < self.partition_probability
def get_latency(self) -> float:
"""Get network latency in milliseconds"""
# Base latency with occasional spikes
latency = self.latency_ms + float(_RNG.normal(0, self.latency_ms * 0.2))
if float(_RNG.random()) < 0.05: # 5% chance of spike
latency *= float(_RNG.uniform(2, 10))
return max(1.0, latency)
def get_bandwidth(self) -> float:
"""Get available bandwidth in Mbps"""
# Bandwidth varies with usage and conditions
usage_factor = float(_RNG.uniform(0.3, 0.9))
condition_factor = float(_RNG.uniform(0.8, 1.2))
return self.bandwidth_mbps * usage_factor * condition_factor
def generate_failure_scenario(config: Dict[str, Any]) -> Dict[str, Any]:
"""Generate a random failure scenario based on config"""
scenario = {
"type": str(_RNG.choice(["crashloop", "oom", "node_failure", "cascade"])),
"severity": float(_RNG.uniform(0.3, 0.9)),
"duration": int(_RNG.integers(30, 301)), # seconds
"affected_components": []
}
# Add specific parameters based on type
if scenario["type"] == "crashloop":
scenario["failure_rate"] = config.get("crash_loop_failure_rate", 0.7)
elif scenario["type"] == "oom":
scenario["failure_rate"] = config.get("oom_kill_failure_rate", 0.6)
elif scenario["type"] == "node_failure":
scenario["failure_rate"] = config.get("node_failure_rate", 0.4)
elif scenario["type"] == "cascade":
scenario["probability"] = config.get("cascade_failure_probability", 0.5)
return scenario
def apply_realistic_noise(value: float, noise_percent: float = 10.0) -> float:
"""Apply realistic noise to a value"""
noise = float(_RNG.normal(0, value * (noise_percent / 100.0)))
return max(0, value + noise) |