Spaces:

SandyTheAdventurer
/

coenv

Sleeping

File size: 7,784 Bytes

05a686e

"""
KubeSimEnv Utils - Probability helpers and simulation utilities
Random failure rate generators, latency simulators, resource usage curves.
Makes the simulation feel realistic and non-deterministic in the right ways.
"""

import numpy as np
import math
from typing import Dict, List, Any, Optional
from datetime import datetime


_RNG = np.random.default_rng()


def set_random_seed(seed: Optional[int]) -> None:
    """Set module-level RNG seed for deterministic utility behavior."""
    global _RNG
    _RNG = np.random.default_rng(seed)


class ProbabilityHelpers:
    """Helpers for generating realistic probabilities and distributions"""
    
    @staticmethod
    def weighted_random_choice(choices: List[Any], weights: List[float]) -> Any:
        """Make a weighted random choice"""
        if not choices or not weights or len(choices) != len(weights):
            return _RNG.choice(choices) if choices else None
        
        # Normalize weights
        total_weight = sum(weights)
        if total_weight == 0:
            return _RNG.choice(choices)
        
        normalized_weights = [w / total_weight for w in weights]
        return _RNG.choice(choices, p=normalized_weights)
    
    @staticmethod
    def exponential_backoff(attempt: int, base_delay: float = 1.0, max_delay: float = 60.0) -> float:
        """Calculate exponential backoff delay"""
        delay = base_delay * (2 ** attempt)
        return min(delay, max_delay)
    
    @staticmethod
    def poisson_arrival_rate(lambda_rate: float, time_window: float) -> int:
        """Generate number of events in time window using Poisson distribution"""
        return int(_RNG.poisson(max(lambda_rate * time_window, 0)))
    
    @staticmethod
    def failure_probability_over_time(base_rate: float, time_elapsed: float, 
                                    max_rate: float = 1.0) -> float:
        """Calculate failure probability that increases over time"""
        probability = base_rate * (1 + math.log(1 + time_elapsed))
        return min(probability, max_rate)
    
    @staticmethod
    def random_failure_rate(min_rate: float = 0.1, max_rate: float = 0.9) -> float:
        """Generate a random failure rate within bounds"""
        return float(_RNG.uniform(min_rate, max_rate))


class LatencySimulator:
    """Simulates network and service latency"""
    
    def __init__(self, base_latency_ms: float = 50.0):
        self.base_latency_ms = base_latency_ms
        self.load_factor = 1.0
        
    def set_load(self, load_factor: float):
        """Set system load factor (1.0 = normal, >1.0 = overloaded)"""
        self.load_factor = max(0.1, load_factor)
        
    def get_latency(self) -> float:
        """Get simulated latency in milliseconds"""
        # Base latency + load-dependent component + random jitter
        load_latency = self.base_latency_ms * (self.load_factor - 1.0) * 2
        jitter = float(_RNG.normal(0, self.base_latency_ms * 0.1))
        latency = self.base_latency_ms + max(0, load_latency) + jitter
        return max(1.0, latency)  # Minimum 1ms latency
    
    def get_latency_with_spike(self, spike_probability: float = 0.05, 
                             spike_multiplier: float = 5.0) -> float:
        """Get latency with occasional spikes"""
        latency = self.get_latency()
        if float(_RNG.random()) < spike_probability:
            latency *= spike_multiplier
        return latency


class ResourceUsageSimulator:
    """Simulates realistic CPU and memory usage patterns"""
    
    def __init__(self):
        self.time_offset = float(_RNG.uniform(0, 2 * math.pi))
        
    def get_cpu_usage(self, base_usage: float = 0.3, 
                     variation: float = 0.2) -> float:
        """Get CPU usage as percentage (0-100)"""
        # Simulate daily patterns with some noise
        time_factor = (datetime.now().timestamp() / 3600) % 24  # Hours in day
        daily_pattern = 0.5 * math.sin(2 * math.pi * time_factor / 24) + 0.5
        
        usage = base_usage + variation * daily_pattern
        usage += float(_RNG.normal(0, 0.05))  # Noise
        return max(0.0, min(1.0, usage)) * 100  # Clamp to 0-100%
    
    def get_memory_usage(self, base_usage: float = 0.4,
                        variation: float = 0.15) -> float:
        """Get memory usage as percentage (0-100)"""
        # Memory usage tends to creep up over time (simulate leak)
        time_factor = min((datetime.now().timestamp() / 86400) % 7, 1.0)  # Weekly pattern
        leak_factor = 0.1 * time_factor  # Slow leak over week
        
        usage = base_usage + leak_factor
        usage += float(_RNG.normal(0, 0.03))  # Noise
        return max(0.0, min(1.0, usage)) * 100  # Clamp to 0-100%
    
    def get_resource_curve(self, resource_type: str, 
                          time_elapsed: float) -> float:
        """Get resource usage following a specific curve"""
        if resource_type == "cpu":
            # CPU: periodic with bursts
            return 0.3 + 0.4 * math.sin(time_elapsed / 100) + 0.2 * float(_RNG.random())
        elif resource_type == "memory":
            # Memory: gradual increase with occasional GC drops
            base = 0.2 + 0.6 * (1 - math.exp(-time_elapsed / 1000))
            gc_drop = 0.3 if float(_RNG.random()) < 0.01 else 0  # Occasional GC
            return max(0, base - gc_drop)
        elif resource_type == "disk":
            # Disk: steady growth
            return 0.1 + 0.8 * min(time_elapsed / 10000, 1.0)
        else:
            return 0.5


class NetworkSimulator:
    """Simulates network conditions and partitions"""
    
    def __init__(self):
        self.partition_probability = 0.01
        self.latency_ms = 10.0
        self.bandwidth_mbps = 1000.0
        
    def simulate_partition(self) -> bool:
        """Return True if network partition is simulated"""
        return float(_RNG.random()) < self.partition_probability
    
    def get_latency(self) -> float:
        """Get network latency in milliseconds"""
        # Base latency with occasional spikes
        latency = self.latency_ms + float(_RNG.normal(0, self.latency_ms * 0.2))
        if float(_RNG.random()) < 0.05:  # 5% chance of spike
            latency *= float(_RNG.uniform(2, 10))
        return max(1.0, latency)
    
    def get_bandwidth(self) -> float:
        """Get available bandwidth in Mbps"""
        # Bandwidth varies with usage and conditions
        usage_factor = float(_RNG.uniform(0.3, 0.9))
        condition_factor = float(_RNG.uniform(0.8, 1.2))
        return self.bandwidth_mbps * usage_factor * condition_factor


def generate_failure_scenario(config: Dict[str, Any]) -> Dict[str, Any]:
    """Generate a random failure scenario based on config"""
    scenario = {
        "type": str(_RNG.choice(["crashloop", "oom", "node_failure", "cascade"])),
        "severity": float(_RNG.uniform(0.3, 0.9)),
        "duration": int(_RNG.integers(30, 301)),  # seconds
        "affected_components": []
    }
    
    # Add specific parameters based on type
    if scenario["type"] == "crashloop":
        scenario["failure_rate"] = config.get("crash_loop_failure_rate", 0.7)
    elif scenario["type"] == "oom":
        scenario["failure_rate"] = config.get("oom_kill_failure_rate", 0.6)
    elif scenario["type"] == "node_failure":
        scenario["failure_rate"] = config.get("node_failure_rate", 0.4)
    elif scenario["type"] == "cascade":
        scenario["probability"] = config.get("cascade_failure_probability", 0.5)
    
    return scenario


def apply_realistic_noise(value: float, noise_percent: float = 10.0) -> float:
    """Apply realistic noise to a value"""
    noise = float(_RNG.normal(0, value * (noise_percent / 100.0)))
    return max(0, value + noise)