File size: 5,755 Bytes
1175c0b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | """
Metric time-series generator.
Produces plausible metric history for services — both the healthy baseline
and the anomaly window. Used to populate metric_history on reset so the
agent sees a 30-minute lookback, not just the current point.
"""
from __future__ import annotations
import random
from typing import Dict, List
def generate_healthy_history(
minutes: int = 30,
start_minute: int = 0,
) -> List[Dict[str, float]]:
"""Generate 'minutes' worth of normal baseline metrics."""
history = []
for m in range(start_minute, start_minute + minutes):
noise = lambda: random.gauss(0, 1)
history.append({
"minute": m,
"cpu": round(max(5, min(40, 15 + noise() * 3)), 1),
"memory": round(max(20, min(55, 35 + noise() * 3)), 1),
"error_rate": round(max(0, min(2, 0.1 + abs(noise()) * 0.1)), 2),
"latency_p50": round(max(5, 12 + noise() * 2), 1),
"latency_p95": round(max(20, 45 + noise() * 5), 1),
"latency_p99": round(max(50, 120 + noise() * 10), 1),
"rps": round(max(200, 500 + noise() * 30), 1),
})
return history
def generate_memory_leak_history(
minutes: int = 30,
start_minute: int = 0,
leak_start_offset: int = 10,
rate: float = 1.5,
) -> List[Dict[str, float]]:
"""
Generate metric history with a memory leak starting partway through.
First 'leak_start_offset' minutes are normal, then memory climbs.
"""
history = []
mem = 35.0
for m in range(start_minute, start_minute + minutes):
noise = lambda: random.gauss(0, 1)
elapsed = m - start_minute
if elapsed >= leak_start_offset:
mem = min(99.0, mem + rate + noise() * 0.3)
cpu = min(95, 15 + (elapsed - leak_start_offset) * 0.3 + noise() * 2)
error_rate = max(0, min(100, (mem - 75) * 2 + noise() * 2)) if mem > 75 else 0.1
lat_p95 = max(45, 45 + (mem - 70) * 8 + noise() * 10) if mem > 70 else 45 + noise() * 5
lat_p99 = max(120, lat_p95 * 2.5 + noise() * 20)
else:
cpu = max(5, min(40, 15 + noise() * 3))
mem = max(20, min(55, 35 + noise() * 3))
error_rate = max(0, 0.1 + abs(noise()) * 0.1)
lat_p95 = max(20, 45 + noise() * 5)
lat_p99 = max(50, 120 + noise() * 10)
history.append({
"minute": m,
"cpu": round(cpu, 1),
"memory": round(mem, 1),
"error_rate": round(error_rate, 2),
"latency_p50": round(max(5, 12 + noise() * 2), 1),
"latency_p95": round(lat_p95, 1),
"latency_p99": round(lat_p99, 1),
"rps": round(max(100, 500 - max(0, mem - 80) * 15 + noise() * 20), 1),
})
return history
def generate_error_spike_history(
minutes: int = 30,
start_minute: int = 0,
spike_start_offset: int = 5,
error_rate_target: float = 60.0,
) -> List[Dict[str, float]]:
"""Metric history where error rate jumps suddenly (e.g. bad config push)."""
history = []
for m in range(start_minute, start_minute + minutes):
noise = lambda: random.gauss(0, 1)
elapsed = m - start_minute
if elapsed >= spike_start_offset:
error_rate = min(100, error_rate_target + noise() * 5)
lat_p95 = max(100, 500 + noise() * 50)
lat_p99 = max(200, 1500 + noise() * 100)
cpu = max(5, min(80, 40 + noise() * 5))
else:
error_rate = max(0, 0.1 + abs(noise()) * 0.1)
lat_p95 = max(20, 45 + noise() * 5)
lat_p99 = max(50, 120 + noise() * 10)
cpu = max(5, min(40, 15 + noise() * 3))
history.append({
"minute": m,
"cpu": round(cpu, 1),
"memory": round(max(20, min(55, 35 + noise() * 3)), 1),
"error_rate": round(error_rate, 2),
"latency_p50": round(max(5, 12 + noise() * 2), 1),
"latency_p95": round(lat_p95, 1),
"latency_p99": round(lat_p99, 1),
"rps": round(max(100, 500 - error_rate * 3 + noise() * 20), 1),
})
return history
def generate_high_latency_history(
minutes: int = 30,
start_minute: int = 0,
latency_start_offset: int = 8,
target_p99: float = 8000,
) -> List[Dict[str, float]]:
"""Metric history with gradually increasing latency (deadlock/contention)."""
history = []
for m in range(start_minute, start_minute + minutes):
noise = lambda: random.gauss(0, 1)
elapsed = m - start_minute
if elapsed >= latency_start_offset:
progress = min(1.0, (elapsed - latency_start_offset) / 15)
lat_p50 = max(12, 12 + progress * 400 + noise() * 20)
lat_p95 = max(45, 45 + progress * target_p99 * 0.6 + noise() * 80)
lat_p99 = max(120, 120 + progress * target_p99 + noise() * 200)
error_rate = max(0, progress * 15 + noise() * 2)
rps = max(20, 500 * (1 - progress * 0.7) + noise() * 15)
else:
lat_p50 = max(5, 12 + noise() * 2)
lat_p95 = max(20, 45 + noise() * 5)
lat_p99 = max(50, 120 + noise() * 10)
error_rate = max(0, 0.1 + abs(noise()) * 0.1)
rps = max(200, 500 + noise() * 30)
history.append({
"minute": m,
"cpu": round(max(5, min(60, 15 + noise() * 3)), 1),
"memory": round(max(20, min(55, 35 + noise() * 3)), 1),
"error_rate": round(error_rate, 2),
"latency_p50": round(lat_p50, 1),
"latency_p95": round(lat_p95, 1),
"latency_p99": round(lat_p99, 1),
"rps": round(rps, 1),
})
return history
|