Spaces:
Configuration error
Configuration error
| """ | |
| Generate a synthetic Alibaba-style cluster trace CSV. | |
| Produces multimodal traffic patterns mimicking the characteristics | |
| reported in the Alibaba microservices-v2021 SoCC'21 paper: | |
| - Diurnal cycle with morning/evening peaks | |
| - Random micro-bursts (5-10 step duration) | |
| - Silent "maintenance windows" with near-zero traffic | |
| - Per-node CPU variance correlated with request rate | |
| Output: server/traces/alibaba_v2021_8node_500steps.csv | |
| """ | |
| import csv | |
| import math | |
| import os | |
| import random | |
| NUM_NODES = 8 | |
| NUM_STEPS = 10000 | |
| OUTPUT_DIR = os.path.join(os.path.dirname(__file__), "traces") | |
| OUTPUT_FILE = os.path.join(OUTPUT_DIR, "alibaba_v2021_8node_500steps.csv") | |
| def generate_trace(seed: int = 2021) -> None: | |
| rng = random.Random(seed) | |
| os.makedirs(OUTPUT_DIR, exist_ok=True) | |
| # Pre-compute micro-burst windows | |
| bursts: list[tuple[int, int, float]] = [] | |
| for _ in range(12): | |
| start = rng.randint(30, NUM_STEPS - 20) | |
| duration = rng.randint(5, 12) | |
| intensity = rng.uniform(2.5, 6.0) | |
| bursts.append((start, start + duration, intensity)) | |
| # Pre-compute silent windows | |
| silents: list[tuple[int, int]] = [] | |
| for _ in range(4): | |
| start = rng.randint(50, NUM_STEPS - 30) | |
| duration = rng.randint(8, 20) | |
| silents.append((start, start + duration)) | |
| # Node personality: each node has a base CPU offset and sensitivity | |
| node_base_cpu = [0.15 + rng.uniform(-0.05, 0.05) for _ in range(NUM_NODES)] | |
| node_sensitivity = [0.4 + rng.uniform(-0.1, 0.15) for _ in range(NUM_NODES)] | |
| # Headers | |
| headers = ["step"] | |
| for i in range(NUM_NODES): | |
| headers.extend([f"node_{i}_cpu", f"node_{i}_mem"]) | |
| headers.extend(["request_rate", "latency_injection"]) | |
| rows = [] | |
| for step in range(NUM_STEPS): | |
| t = step / NUM_STEPS | |
| # --- Diurnal cycle: two peaks (morning at 0.25, evening at 0.7) --- | |
| diurnal = ( | |
| 0.5 | |
| + 0.3 * math.sin(2 * math.pi * t - math.pi / 2) | |
| + 0.15 * math.sin(4 * math.pi * t) | |
| ) | |
| # --- Check for micro-bursts --- | |
| burst_mult = 1.0 | |
| for b_start, b_end, b_intensity in bursts: | |
| if b_start <= step < b_end: | |
| # Bell-curve shape within burst | |
| mid = (b_start + b_end) / 2.0 | |
| dist = abs(step - mid) / max(1, (b_end - b_start) / 2.0) | |
| burst_mult = max(burst_mult, b_intensity * math.exp(-dist * dist)) | |
| # --- Check for silent windows --- | |
| is_silent = any(s_start <= step < s_end for s_start, s_end in silents) | |
| # --- Request rate --- | |
| base_rate = 100.0 | |
| if is_silent: | |
| request_rate = base_rate * rng.uniform(0.05, 0.15) | |
| else: | |
| request_rate = base_rate * diurnal * burst_mult | |
| # Add Poisson-like noise | |
| request_rate += rng.gauss(0, request_rate * 0.08) | |
| request_rate = max(10.0, request_rate) | |
| # --- Per-node CPU and memory --- | |
| node_data: list[float] = [] | |
| for i in range(NUM_NODES): | |
| # CPU correlates with request rate but has per-node characteristics | |
| load_factor = request_rate / (base_rate * 1.5) | |
| cpu = ( | |
| node_base_cpu[i] | |
| + node_sensitivity[i] * load_factor | |
| + rng.gauss(0, 0.03) | |
| ) | |
| # Occasional per-node anomaly (simulates GC pauses, log rotation) | |
| if rng.random() < 0.02: | |
| cpu += rng.uniform(0.15, 0.35) | |
| cpu = max(0.02, min(0.99, cpu)) | |
| # Memory: slower-moving, correlated with CPU | |
| mem = 0.3 + cpu * 0.4 + rng.gauss(0, 0.02) | |
| mem = max(0.05, min(0.95, mem)) | |
| node_data.extend([round(cpu, 4), round(mem, 4)]) | |
| # --- Latency injection: extra latency from trace --- | |
| if is_silent: | |
| latency_inj = rng.uniform(0, 3) | |
| elif burst_mult > 2.0: | |
| latency_inj = rng.uniform(15, 60) * (burst_mult / 3.0) | |
| else: | |
| latency_inj = rng.uniform(0, 12) * diurnal | |
| row = [step] + node_data + [round(request_rate, 2), round(latency_inj, 2)] | |
| rows.append(row) | |
| with open(OUTPUT_FILE, "w", newline="") as f: | |
| writer = csv.writer(f) | |
| writer.writerow(headers) | |
| writer.writerows(rows) | |
| print(f"Generated trace: {OUTPUT_FILE} ({NUM_STEPS} steps, {NUM_NODES} nodes)") | |
| if __name__ == "__main__": | |
| generate_trace() | |