DIME / server /generate_trace.py
Naseer-010's picture
Update codebase and ignore DIME.pdf and image_gen binaries
facabc7
"""
Generate a synthetic Alibaba-style cluster trace CSV.
Produces multimodal traffic patterns mimicking the characteristics
reported in the Alibaba microservices-v2021 SoCC'21 paper:
- Diurnal cycle with morning/evening peaks
- Random micro-bursts (5-10 step duration)
- Silent "maintenance windows" with near-zero traffic
- Per-node CPU variance correlated with request rate
Output: server/traces/alibaba_v2021_8node_500steps.csv
"""
import csv
import math
import os
import random
NUM_NODES = 8
NUM_STEPS = 10000
OUTPUT_DIR = os.path.join(os.path.dirname(__file__), "traces")
OUTPUT_FILE = os.path.join(OUTPUT_DIR, "alibaba_v2021_8node_500steps.csv")
def generate_trace(seed: int = 2021) -> None:
rng = random.Random(seed)
os.makedirs(OUTPUT_DIR, exist_ok=True)
# Pre-compute micro-burst windows
bursts: list[tuple[int, int, float]] = []
for _ in range(12):
start = rng.randint(30, NUM_STEPS - 20)
duration = rng.randint(5, 12)
intensity = rng.uniform(2.5, 6.0)
bursts.append((start, start + duration, intensity))
# Pre-compute silent windows
silents: list[tuple[int, int]] = []
for _ in range(4):
start = rng.randint(50, NUM_STEPS - 30)
duration = rng.randint(8, 20)
silents.append((start, start + duration))
# Node personality: each node has a base CPU offset and sensitivity
node_base_cpu = [0.15 + rng.uniform(-0.05, 0.05) for _ in range(NUM_NODES)]
node_sensitivity = [0.4 + rng.uniform(-0.1, 0.15) for _ in range(NUM_NODES)]
# Headers
headers = ["step"]
for i in range(NUM_NODES):
headers.extend([f"node_{i}_cpu", f"node_{i}_mem"])
headers.extend(["request_rate", "latency_injection"])
rows = []
for step in range(NUM_STEPS):
t = step / NUM_STEPS
# --- Diurnal cycle: two peaks (morning at 0.25, evening at 0.7) ---
diurnal = (
0.5
+ 0.3 * math.sin(2 * math.pi * t - math.pi / 2)
+ 0.15 * math.sin(4 * math.pi * t)
)
# --- Check for micro-bursts ---
burst_mult = 1.0
for b_start, b_end, b_intensity in bursts:
if b_start <= step < b_end:
# Bell-curve shape within burst
mid = (b_start + b_end) / 2.0
dist = abs(step - mid) / max(1, (b_end - b_start) / 2.0)
burst_mult = max(burst_mult, b_intensity * math.exp(-dist * dist))
# --- Check for silent windows ---
is_silent = any(s_start <= step < s_end for s_start, s_end in silents)
# --- Request rate ---
base_rate = 100.0
if is_silent:
request_rate = base_rate * rng.uniform(0.05, 0.15)
else:
request_rate = base_rate * diurnal * burst_mult
# Add Poisson-like noise
request_rate += rng.gauss(0, request_rate * 0.08)
request_rate = max(10.0, request_rate)
# --- Per-node CPU and memory ---
node_data: list[float] = []
for i in range(NUM_NODES):
# CPU correlates with request rate but has per-node characteristics
load_factor = request_rate / (base_rate * 1.5)
cpu = (
node_base_cpu[i]
+ node_sensitivity[i] * load_factor
+ rng.gauss(0, 0.03)
)
# Occasional per-node anomaly (simulates GC pauses, log rotation)
if rng.random() < 0.02:
cpu += rng.uniform(0.15, 0.35)
cpu = max(0.02, min(0.99, cpu))
# Memory: slower-moving, correlated with CPU
mem = 0.3 + cpu * 0.4 + rng.gauss(0, 0.02)
mem = max(0.05, min(0.95, mem))
node_data.extend([round(cpu, 4), round(mem, 4)])
# --- Latency injection: extra latency from trace ---
if is_silent:
latency_inj = rng.uniform(0, 3)
elif burst_mult > 2.0:
latency_inj = rng.uniform(15, 60) * (burst_mult / 3.0)
else:
latency_inj = rng.uniform(0, 12) * diurnal
row = [step] + node_data + [round(request_rate, 2), round(latency_inj, 2)]
rows.append(row)
with open(OUTPUT_FILE, "w", newline="") as f:
writer = csv.writer(f)
writer.writerow(headers)
writer.writerows(rows)
print(f"Generated trace: {OUTPUT_FILE} ({NUM_STEPS} steps, {NUM_NODES} nodes)")
if __name__ == "__main__":
generate_trace()